Guard madvise ENOMEM and stabilize pool/tiny front v3
This commit is contained in:
@ -4,6 +4,7 @@
|
||||
|
||||
#include "pagefault_telemetry_box.h" // Box PageFaultTelemetry (PF_BUCKET_MID)
|
||||
#include "box/pool_hotbox_v2_box.h"
|
||||
#include "box/tiny_heap_env_box.h" // TinyHeap profile (C7_SAFE では flatten を無効化)
|
||||
|
||||
// Pool v2 is experimental. Default OFF (use legacy v1 path).
|
||||
static inline int hak_pool_v2_enabled(void) {
|
||||
@ -40,6 +41,12 @@ static inline int hak_pool_v2_tls_fast_enabled(void) {
|
||||
static inline int hak_pool_v1_flatten_enabled(void) {
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
// C7_SAFE/C7_ULTRA_BENCH プロファイルでは、安全側で強制 OFF
|
||||
int mode = tiny_heap_profile_mode();
|
||||
if (mode == TINY_HEAP_PROFILE_C7_SAFE || mode == TINY_HEAP_PROFILE_C7_ULTRA_BENCH) {
|
||||
g = 0;
|
||||
return g;
|
||||
}
|
||||
const char* e = getenv("HAKMEM_POOL_V1_FLATTEN_ENABLED");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
|
||||
86
core/box/pool_hotbox_v2_box.h
Normal file
86
core/box/pool_hotbox_v2_box.h
Normal file
@ -0,0 +1,86 @@
|
||||
// pool_hotbox_v2_box.h — Experimental PoolHotBox v2 (hot path scaffold)
|
||||
#ifndef POOL_HOTBOX_V2_BOX_H
|
||||
#define POOL_HOTBOX_V2_BOX_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdatomic.h>
|
||||
|
||||
#include "hakmem_pool.h" // for POOL_NUM_CLASSES and size helpers
|
||||
|
||||
// ENV gates (bench/実験専用):
|
||||
// HAKMEM_POOL_V2_ENABLED : overall ON/OFF (default OFF)
|
||||
// HAKMEM_POOL_V2_CLASSES : bitmask, bit i=1 → class i を HotBox v2 に載せる
|
||||
// HAKMEM_POOL_V2_STATS : stats dump ON/OFF
|
||||
|
||||
typedef struct PoolHotBoxV2Stats {
|
||||
_Atomic uint64_t alloc_calls;
|
||||
_Atomic uint64_t alloc_fast;
|
||||
_Atomic uint64_t alloc_refill;
|
||||
_Atomic uint64_t alloc_refill_fail;
|
||||
_Atomic uint64_t alloc_fallback_v1;
|
||||
_Atomic uint64_t free_calls;
|
||||
_Atomic uint64_t free_fast;
|
||||
_Atomic uint64_t free_fallback_v1;
|
||||
_Atomic uint64_t page_of_fail_header_missing;
|
||||
_Atomic uint64_t page_of_fail_out_of_range;
|
||||
_Atomic uint64_t page_of_fail_misaligned;
|
||||
_Atomic uint64_t page_of_fail_unknown;
|
||||
} PoolHotBoxV2Stats;
|
||||
|
||||
// Simple page/class structs for future HotBox v2 implementation.
|
||||
typedef struct pool_page_v2 {
|
||||
void* freelist;
|
||||
uint32_t used;
|
||||
uint32_t capacity;
|
||||
uint32_t block_size;
|
||||
uint32_t class_idx;
|
||||
void* base;
|
||||
void* slab_ref;
|
||||
struct pool_page_v2* next;
|
||||
} pool_page_v2;
|
||||
|
||||
typedef struct pool_class_v2 {
|
||||
pool_page_v2* current;
|
||||
pool_page_v2* partial;
|
||||
uint16_t max_partial_pages;
|
||||
uint16_t partial_count;
|
||||
uint32_t block_size;
|
||||
} pool_class_v2;
|
||||
|
||||
typedef struct pool_ctx_v2 {
|
||||
pool_class_v2 cls[POOL_NUM_CLASSES];
|
||||
} pool_ctx_v2;
|
||||
|
||||
typedef struct PoolColdIface {
|
||||
void* (*refill_page)(void* cold_ctx,
|
||||
uint32_t class_idx,
|
||||
uint32_t* out_block_size,
|
||||
uint32_t* out_capacity,
|
||||
void** out_slab_ref);
|
||||
void (*retire_page)(void* cold_ctx,
|
||||
uint32_t class_idx,
|
||||
void* slab_ref,
|
||||
void* base);
|
||||
} PoolColdIface;
|
||||
|
||||
// ENV helpers
|
||||
int pool_hotbox_v2_class_enabled(int class_idx);
|
||||
int pool_hotbox_v2_stats_enabled(void);
|
||||
|
||||
// TLS/context helpers
|
||||
pool_ctx_v2* pool_v2_tls_get(void);
|
||||
|
||||
// Hot path (currently stubbed to always fall back to v1; structure only)
|
||||
void* pool_hotbox_v2_alloc(uint32_t class_idx, size_t size, uintptr_t site_id);
|
||||
int pool_hotbox_v2_free(uint32_t class_idx, void* raw_block);
|
||||
|
||||
// Stats helpers
|
||||
void pool_hotbox_v2_record_free_call(uint32_t class_idx);
|
||||
void pool_hotbox_v2_record_alloc_fallback(uint32_t class_idx);
|
||||
void pool_hotbox_v2_record_free_fallback(uint32_t class_idx);
|
||||
|
||||
// Stats export (destructor in hakmem_pool.c)
|
||||
extern PoolHotBoxV2Stats g_pool_hotbox_v2_stats[POOL_NUM_CLASSES];
|
||||
|
||||
#endif // POOL_HOTBOX_V2_BOX_H
|
||||
33
core/box/pool_hotbox_v2_header_box.h
Normal file
33
core/box/pool_hotbox_v2_header_box.h
Normal file
@ -0,0 +1,33 @@
|
||||
// pool_hotbox_v2_header_box.h
|
||||
// Small helpers for embedding/reading the v2 pool page pointer in the page header.
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// Mask a pointer down to the page base (POOL_PAGE_SIZE is a power of two).
|
||||
static inline void* pool_hotbox_v2_page_base(void* ptr, size_t page_size) {
|
||||
return (void*)((uintptr_t)ptr & ~((uintptr_t)page_size - 1));
|
||||
}
|
||||
|
||||
// Store the PoolHotBox v2 page pointer into the page header.
|
||||
// Caller must ensure base is page_size aligned and non-NULL.
|
||||
static inline void pool_hotbox_v2_header_store(void* page_base, void* page_ptr) {
|
||||
if (!page_base) return;
|
||||
void** hdr = (void**)page_base;
|
||||
*hdr = page_ptr;
|
||||
}
|
||||
|
||||
// Clear the page header pointer (used on retire to avoid stale lookups).
|
||||
static inline void pool_hotbox_v2_header_clear(void* page_base) {
|
||||
if (!page_base) return;
|
||||
void** hdr = (void**)page_base;
|
||||
*hdr = NULL;
|
||||
}
|
||||
|
||||
// Load the page pointer from the page header (may return NULL).
|
||||
static inline void* pool_hotbox_v2_header_load(void* page_base) {
|
||||
if (!page_base) return NULL;
|
||||
void** hdr = (void**)page_base;
|
||||
return *hdr;
|
||||
}
|
||||
|
||||
@ -41,6 +41,22 @@ static void mid_desc_register(void* page, int class_idx, uint64_t owner_tid) {
|
||||
void* canonical_page = (void*)((uintptr_t)page & ~((uintptr_t)POOL_PAGE_SIZE - 1));
|
||||
uint32_t h = mid_desc_hash(canonical_page);
|
||||
pthread_mutex_lock(&g_mid_desc_mu[h]);
|
||||
|
||||
// Check if descriptor already exists
|
||||
MidPageDesc* existing = g_mid_desc_head[h];
|
||||
while (existing) {
|
||||
if (existing->page == canonical_page) {
|
||||
// Descriptor already exists, update owner_tid if needed
|
||||
if (existing->owner_tid == 0 && owner_tid != 0) {
|
||||
existing->owner_tid = owner_tid;
|
||||
}
|
||||
pthread_mutex_unlock(&g_mid_desc_mu[h]);
|
||||
return;
|
||||
}
|
||||
existing = existing->next;
|
||||
}
|
||||
|
||||
// Descriptor doesn't exist, create new one
|
||||
MidPageDesc* d = (MidPageDesc*)hkm_libc_malloc(sizeof(MidPageDesc)); // P0 Fix: Use libc malloc
|
||||
if (d) {
|
||||
d->page = canonical_page;
|
||||
@ -76,7 +92,16 @@ static void mid_desc_adopt(void* addr, int class_idx, uint64_t owner_tid) {
|
||||
if (d->owner_tid == 0) d->owner_tid = owner_tid;
|
||||
} else {
|
||||
MidPageDesc* nd = (MidPageDesc*)hkm_libc_malloc(sizeof(MidPageDesc)); // P0 Fix: Use libc malloc
|
||||
if (nd) { nd->page = page; nd->class_idx = (uint8_t)class_idx; nd->owner_tid = owner_tid; nd->next = g_mid_desc_head[h]; g_mid_desc_head[h] = nd; }
|
||||
if (nd) {
|
||||
nd->page = page;
|
||||
nd->class_idx = (uint8_t)class_idx;
|
||||
nd->owner_tid = owner_tid;
|
||||
nd->next = g_mid_desc_head[h];
|
||||
atomic_store(&nd->in_use, 0);
|
||||
nd->blocks_per_page = 0;
|
||||
atomic_store(&nd->pending_dn, 0);
|
||||
g_mid_desc_head[h] = nd;
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&g_mid_desc_mu[h]);
|
||||
}
|
||||
|
||||
80
core/box/smallobject_cold_iface_v1.h
Normal file
80
core/box/smallobject_cold_iface_v1.h
Normal file
@ -0,0 +1,80 @@
|
||||
// smallobject_cold_iface_v1.h - Cold interface wrapper for SmallObject HotBox v3
|
||||
// 役割:
|
||||
// - SmallObject Hot Box (v3) と既存 v1 Tiny Cold 層の境界を 1 箇所にまとめる。
|
||||
// - Phase A: C7 の refill/retire だけを v1 TinyHeap へラップする。
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include "tiny_heap_box.h"
|
||||
#include "smallobject_hotbox_v3_box.h"
|
||||
#include "../hakmem_tiny.h" // TINY_SLAB_SIZE for slab base mask
|
||||
|
||||
struct so_page_v3;
|
||||
|
||||
typedef struct SmallObjectColdIface {
|
||||
struct so_page_v3* (*refill_page)(void* cold_ctx, uint32_t class_idx);
|
||||
void (*retire_page)(void* cold_ctx, uint32_t class_idx, struct so_page_v3* page);
|
||||
} SmallObjectColdIface;
|
||||
|
||||
static inline struct so_page_v3* smallobject_cold_refill_page_v1(void* cold_ctx, uint32_t class_idx) {
|
||||
if (class_idx != 7 && class_idx != 6) {
|
||||
return NULL; // Phase A-2: C7/C6 のみ対応
|
||||
}
|
||||
tiny_heap_ctx_t* ctx = cold_ctx ? (tiny_heap_ctx_t*)cold_ctx : tiny_heap_ctx_for_thread();
|
||||
if (!ctx) return NULL;
|
||||
tiny_heap_page_t* lease = tiny_heap_prepare_page(ctx, (int)class_idx);
|
||||
if (!lease) return NULL;
|
||||
|
||||
so_page_v3* page = (so_page_v3*)calloc(1, sizeof(so_page_v3));
|
||||
if (!page) return NULL;
|
||||
|
||||
page->lease_page = lease;
|
||||
page->meta = lease->meta;
|
||||
page->ss = lease->ss;
|
||||
page->slab_idx = lease->slab_idx;
|
||||
page->base = lease->base;
|
||||
page->capacity = lease->capacity;
|
||||
page->block_size = (uint32_t)tiny_stride_for_class((int)class_idx);
|
||||
page->class_idx = class_idx;
|
||||
page->slab_ref = lease;
|
||||
return page;
|
||||
}
|
||||
|
||||
static inline void smallobject_cold_retire_page_v1(void* cold_ctx, uint32_t class_idx, struct so_page_v3* page) {
|
||||
if (!page || (class_idx != 7 && class_idx != 6)) {
|
||||
if (page) {
|
||||
free(page);
|
||||
}
|
||||
return;
|
||||
}
|
||||
tiny_heap_ctx_t* ctx = cold_ctx ? (tiny_heap_ctx_t*)cold_ctx : tiny_heap_ctx_for_thread();
|
||||
if (!ctx) {
|
||||
free(page);
|
||||
return;
|
||||
}
|
||||
tiny_heap_page_t* lease = page->lease_page;
|
||||
if (!lease) {
|
||||
free(page);
|
||||
return;
|
||||
}
|
||||
|
||||
lease->base = (uint8_t*)page->base;
|
||||
lease->capacity = (uint16_t)page->capacity;
|
||||
lease->used = (uint16_t)page->used;
|
||||
lease->meta = page->meta;
|
||||
lease->ss = page->ss;
|
||||
lease->slab_idx = page->slab_idx;
|
||||
lease->free_list = page->freelist;
|
||||
|
||||
tiny_heap_page_becomes_empty(ctx, (int)class_idx, lease);
|
||||
free(page);
|
||||
}
|
||||
|
||||
static inline SmallObjectColdIface smallobject_cold_iface_v1(void) {
|
||||
SmallObjectColdIface iface = {
|
||||
.refill_page = smallobject_cold_refill_page_v1,
|
||||
.retire_page = smallobject_cold_retire_page_v1,
|
||||
};
|
||||
return iface;
|
||||
}
|
||||
74
core/box/smallobject_hotbox_v3_box.h
Normal file
74
core/box/smallobject_hotbox_v3_box.h
Normal file
@ -0,0 +1,74 @@
|
||||
// smallobject_hotbox_v3_box.h - SmallObject HotHeap v3 (C7-first skeleton)
|
||||
//
|
||||
// Phase A/B: 型と TLS / stats を用意し、front が呼べる枠を置く。
|
||||
// まだ中身は v1 fallback(so_alloc は NULL を返す)。
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdatomic.h>
|
||||
#include "tiny_geometry_box.h"
|
||||
#include "smallobject_hotbox_v3_env_box.h"
|
||||
#include "tiny_region_id.h"
|
||||
|
||||
#ifndef SMALLOBJECT_NUM_CLASSES
|
||||
#define SMALLOBJECT_NUM_CLASSES TINY_NUM_CLASSES
|
||||
#endif
|
||||
|
||||
struct tiny_heap_page_t;
|
||||
struct TinySlabMeta;
|
||||
struct SuperSlab;
|
||||
|
||||
typedef struct so_page_v3 {
|
||||
void* freelist;
|
||||
uint32_t used;
|
||||
uint32_t capacity;
|
||||
uint32_t block_size;
|
||||
uint32_t class_idx;
|
||||
uint32_t flags;
|
||||
void* base; // carve 後のユーザ領域先頭
|
||||
void* slab_base; // 64KiB slab 基底(page_of 用ヘッダを書き込む)
|
||||
struct TinySlabMeta* meta;
|
||||
struct SuperSlab* ss;
|
||||
uint16_t slab_idx;
|
||||
struct tiny_heap_page_t* lease_page;
|
||||
void* slab_ref; // kept as a generic token; currently same as lease_page for v1
|
||||
struct so_page_v3* next;
|
||||
} so_page_v3;
|
||||
|
||||
typedef struct so_class_v3 {
|
||||
so_page_v3* current;
|
||||
so_page_v3* partial;
|
||||
uint16_t max_partial_pages;
|
||||
uint16_t partial_count;
|
||||
uint32_t block_size;
|
||||
} so_class_v3;
|
||||
|
||||
typedef struct so_ctx_v3 {
|
||||
so_class_v3 cls[SMALLOBJECT_NUM_CLASSES];
|
||||
} so_ctx_v3;
|
||||
|
||||
typedef struct so_stats_class_v3 {
|
||||
_Atomic uint64_t route_hits;
|
||||
_Atomic uint64_t alloc_calls;
|
||||
_Atomic uint64_t alloc_refill;
|
||||
_Atomic uint64_t alloc_fallback_v1;
|
||||
_Atomic uint64_t free_calls;
|
||||
_Atomic uint64_t free_fallback_v1;
|
||||
} so_stats_class_v3;
|
||||
|
||||
// Stats helpers (defined in core/smallobject_hotbox_v3.c)
|
||||
int so_v3_stats_enabled(void);
|
||||
void so_v3_record_route_hit(uint8_t ci);
|
||||
void so_v3_record_alloc_call(uint8_t ci);
|
||||
void so_v3_record_alloc_refill(uint8_t ci);
|
||||
void so_v3_record_alloc_fallback(uint8_t ci);
|
||||
void so_v3_record_free_call(uint8_t ci);
|
||||
void so_v3_record_free_fallback(uint8_t ci);
|
||||
|
||||
// TLS accessor (core/smallobject_hotbox_v3.c)
|
||||
so_ctx_v3* so_tls_get(void);
|
||||
|
||||
// Hot path API (Phase B: stub → always fallback to v1)
|
||||
void* so_alloc(uint32_t class_idx);
|
||||
void so_free(uint32_t class_idx, void* ptr);
|
||||
47
core/box/smallobject_hotbox_v3_env_box.h
Normal file
47
core/box/smallobject_hotbox_v3_env_box.h
Normal file
@ -0,0 +1,47 @@
|
||||
// smallobject_hotbox_v3_env_box.h - ENV gate for SmallObject HotHeap v3
|
||||
// 役割:
|
||||
// - HAKMEM_SMALL_HEAP_V3_ENABLED / HAKMEM_SMALL_HEAP_V3_CLASSES をまとめて読む。
|
||||
// - デフォルトは C7-only ON(クラスマスク 0x80)。ENV で明示的に 0 を指定した場合のみ v3 を無効化。
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "../hakmem_tiny_config.h"
|
||||
|
||||
static inline int small_heap_v3_enabled(void) {
|
||||
static int g_enable = -1;
|
||||
if (__builtin_expect(g_enable == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_SMALL_HEAP_V3_ENABLED");
|
||||
if (e && *e) {
|
||||
g_enable = (*e != '0') ? 1 : 0;
|
||||
} else {
|
||||
// デフォルトは ON(ENV 未指定時は有効)
|
||||
g_enable = 1;
|
||||
}
|
||||
}
|
||||
return g_enable;
|
||||
}
|
||||
|
||||
static inline int small_heap_v3_class_enabled(uint8_t class_idx) {
|
||||
static int g_parsed = 0;
|
||||
static unsigned g_mask = 0;
|
||||
if (__builtin_expect(!g_parsed, 0)) {
|
||||
const char* e = getenv("HAKMEM_SMALL_HEAP_V3_CLASSES");
|
||||
if (e && *e) {
|
||||
unsigned v = (unsigned)strtoul(e, NULL, 0);
|
||||
g_mask = v & 0xFFu;
|
||||
} else {
|
||||
// デフォルトは C7 のみ v3 ON
|
||||
g_mask = 0x80u;
|
||||
}
|
||||
g_parsed = 1;
|
||||
}
|
||||
if (!small_heap_v3_enabled()) return 0;
|
||||
if (class_idx >= TINY_NUM_CLASSES) return 0;
|
||||
return (g_mask & (1u << class_idx)) != 0;
|
||||
}
|
||||
|
||||
static inline int small_heap_v3_c7_enabled(void) {
|
||||
return small_heap_v3_class_enabled(7);
|
||||
}
|
||||
@ -360,7 +360,7 @@ void superslab_free(SuperSlab* ss) {
|
||||
}
|
||||
if (lazy_zero_enabled) {
|
||||
#ifdef MADV_DONTNEED
|
||||
(void)madvise((void*)ss, ss_size, MADV_DONTNEED);
|
||||
(void)ss_os_madvise_guarded((void*)ss, ss_size, MADV_DONTNEED, "ss_lru_madvise");
|
||||
ss_os_stats_record_madvise();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
// ss_os_acquire_box.c - SuperSlab OS Memory Acquisition Box Implementation
|
||||
#include "ss_os_acquire_box.h"
|
||||
#include "../hakmem_build_flags.h"
|
||||
#include "../hakmem_env_cache.h"
|
||||
#include <sys/mman.h>
|
||||
#include <sys/resource.h>
|
||||
#include <errno.h>
|
||||
@ -15,8 +16,11 @@ extern _Atomic uint64_t g_final_fallback_mmap_count;
|
||||
extern _Atomic uint64_t g_ss_os_alloc_calls;
|
||||
extern _Atomic uint64_t g_ss_os_free_calls;
|
||||
extern _Atomic uint64_t g_ss_os_madvise_calls;
|
||||
extern _Atomic uint64_t g_ss_os_madvise_fail_enomem;
|
||||
extern _Atomic uint64_t g_ss_os_madvise_fail_other;
|
||||
extern _Atomic uint64_t g_ss_os_huge_alloc_calls;
|
||||
extern _Atomic uint64_t g_ss_os_huge_fail_calls;
|
||||
extern _Atomic bool g_ss_madvise_disabled;
|
||||
|
||||
// ============================================================================
|
||||
// OOM Diagnostics
|
||||
@ -240,9 +244,12 @@ void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int p
|
||||
// See: EXPLICIT_PREFAULT_IMPLEMENTATION_REPORT_20251205.md
|
||||
#ifdef MADV_POPULATE_WRITE
|
||||
if (populate) {
|
||||
int ret = madvise(ptr, ss_size, MADV_POPULATE_WRITE);
|
||||
ss_os_stats_record_madvise();
|
||||
int ret = ss_os_madvise_guarded(ptr, ss_size, MADV_POPULATE_WRITE, "ss_os_acquire_populate");
|
||||
if (ret != 0) {
|
||||
if (HAK_ENV_SS_MADVISE_STRICT() && errno == EINVAL) {
|
||||
fprintf(stderr, "[SS_OS] madvise(MADV_POPULATE_WRITE) EINVAL (strict mode). Aborting.\n");
|
||||
abort();
|
||||
}
|
||||
// Fallback for kernels that support MADV_POPULATE_WRITE but it fails
|
||||
// Use explicit page-by-page touching with writes
|
||||
volatile char* p = (volatile char*)ptr;
|
||||
@ -273,10 +280,14 @@ static void ss_os_stats_destructor(void) {
|
||||
return;
|
||||
}
|
||||
fprintf(stderr,
|
||||
"[SS_OS_STATS] alloc=%llu free=%llu madvise=%llu mmap_total=%llu fallback_mmap=%llu huge_alloc=%llu huge_fail=%llu\n",
|
||||
"[SS_OS_STATS] alloc=%llu free=%llu madvise=%llu madvise_enomem=%llu madvise_other=%llu madvise_disabled=%d "
|
||||
"mmap_total=%llu fallback_mmap=%llu huge_alloc=%llu huge_fail=%llu\n",
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_os_alloc_calls, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_os_free_calls, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_os_madvise_calls, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_os_madvise_fail_enomem, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_os_madvise_fail_other, memory_order_relaxed),
|
||||
atomic_load_explicit(&g_ss_madvise_disabled, memory_order_relaxed) ? 1 : 0,
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_mmap_count, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_final_fallback_mmap_count, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_os_huge_alloc_calls, memory_order_relaxed),
|
||||
|
||||
@ -18,7 +18,11 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdatomic.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// ============================================================================
|
||||
// Global Counters (for debugging/diagnostics)
|
||||
@ -29,8 +33,11 @@ extern _Atomic uint64_t g_final_fallback_mmap_count;
|
||||
extern _Atomic uint64_t g_ss_os_alloc_calls;
|
||||
extern _Atomic uint64_t g_ss_os_free_calls;
|
||||
extern _Atomic uint64_t g_ss_os_madvise_calls;
|
||||
extern _Atomic uint64_t g_ss_os_madvise_fail_enomem;
|
||||
extern _Atomic uint64_t g_ss_os_madvise_fail_other;
|
||||
extern _Atomic uint64_t g_ss_os_huge_alloc_calls;
|
||||
extern _Atomic uint64_t g_ss_os_huge_fail_calls;
|
||||
extern _Atomic bool g_ss_madvise_disabled;
|
||||
|
||||
static inline int ss_os_stats_enabled(void) {
|
||||
static int g_ss_os_stats_enabled = -1;
|
||||
@ -62,6 +69,52 @@ static inline void ss_os_stats_record_madvise(void) {
|
||||
atomic_fetch_add_explicit(&g_ss_os_madvise_calls, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// madvise guard (shared by Superslab hot/cold paths)
|
||||
// ============================================================================
|
||||
//
|
||||
static inline int ss_os_madvise_guarded(void* ptr, size_t len, int advice, const char* where) {
|
||||
(void)where;
|
||||
if (!ptr || len == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (atomic_load_explicit(&g_ss_madvise_disabled, memory_order_relaxed)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ret = madvise(ptr, len, advice);
|
||||
ss_os_stats_record_madvise();
|
||||
if (ret == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int e = errno;
|
||||
if (e == ENOMEM) {
|
||||
atomic_fetch_add_explicit(&g_ss_os_madvise_fail_enomem, 1, memory_order_relaxed);
|
||||
atomic_store_explicit(&g_ss_madvise_disabled, true, memory_order_relaxed);
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static _Atomic bool g_ss_madvise_enomem_logged = false;
|
||||
bool already = atomic_exchange_explicit(&g_ss_madvise_enomem_logged, true, memory_order_relaxed);
|
||||
if (!already) {
|
||||
fprintf(stderr,
|
||||
"[SS_OS_MADVISE] madvise(advice=%d, ptr=%p, len=%zu) failed with ENOMEM "
|
||||
"(vm.max_map_count reached?). Disabling further madvise calls.\n",
|
||||
advice, ptr, len);
|
||||
}
|
||||
#endif
|
||||
return 0; // soft fail, do not propagate ENOMEM
|
||||
}
|
||||
|
||||
atomic_fetch_add_explicit(&g_ss_os_madvise_fail_other, 1, memory_order_relaxed);
|
||||
if (e == EINVAL) {
|
||||
errno = e;
|
||||
return -1; // let caller decide (strict mode)
|
||||
}
|
||||
errno = e;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// HugePage Experiment (research-only)
|
||||
// ============================================================================
|
||||
|
||||
37
core/box/tiny_cold_iface_v1.h
Normal file
37
core/box/tiny_cold_iface_v1.h
Normal file
@ -0,0 +1,37 @@
|
||||
// tiny_cold_iface_v1.h
|
||||
// TinyHotHeap v2 など別 Hot Box が Superslab/Tier/Stats と話すための共通境界 (v1 wrapper)。
|
||||
// 前提: tiny_heap_box.h で tiny_heap_page_t / tiny_heap_ctx_t が定義済みであること。
|
||||
#pragma once
|
||||
|
||||
#include "tiny_heap_box.h"
|
||||
|
||||
typedef struct TinyColdIface {
|
||||
tiny_heap_page_t* (*refill_page)(void* cold_ctx, uint32_t class_idx);
|
||||
void (*retire_page)(void* cold_ctx, uint32_t class_idx, tiny_heap_page_t* page);
|
||||
} TinyColdIface;
|
||||
|
||||
// Forward declarations for the v1 cold helpers (defined in tiny_heap_box.h)
|
||||
tiny_heap_page_t* tiny_heap_prepare_page(tiny_heap_ctx_t* ctx, int class_idx);
|
||||
void tiny_heap_page_becomes_empty(tiny_heap_ctx_t* ctx, int class_idx, tiny_heap_page_t* page);
|
||||
|
||||
static inline tiny_heap_page_t* tiny_cold_refill_page_v1(void* cold_ctx, uint32_t class_idx) {
|
||||
if (!cold_ctx) {
|
||||
return NULL;
|
||||
}
|
||||
return tiny_heap_prepare_page((tiny_heap_ctx_t*)cold_ctx, (int)class_idx);
|
||||
}
|
||||
|
||||
static inline void tiny_cold_retire_page_v1(void* cold_ctx, uint32_t class_idx, tiny_heap_page_t* page) {
|
||||
if (!cold_ctx || !page) {
|
||||
return;
|
||||
}
|
||||
tiny_heap_page_becomes_empty((tiny_heap_ctx_t*)cold_ctx, (int)class_idx, page);
|
||||
}
|
||||
|
||||
static inline TinyColdIface tiny_cold_iface_v1(void) {
|
||||
TinyColdIface iface = {
|
||||
.refill_page = tiny_cold_refill_page_v1,
|
||||
.retire_page = tiny_cold_retire_page_v1,
|
||||
};
|
||||
return iface;
|
||||
}
|
||||
101
core/box/tiny_front_v3_env_box.h
Normal file
101
core/box/tiny_front_v3_env_box.h
Normal file
@ -0,0 +1,101 @@
|
||||
// tiny_front_v3_env_box.h - Tiny Front v3 ENV gate & snapshot (guard/UC/header)
|
||||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct TinyFrontV3Snapshot {
|
||||
bool unified_cache_on;
|
||||
bool tiny_guard_on;
|
||||
uint8_t header_mode; // tiny_header_mode() の値をキャッシュ
|
||||
bool header_v3_enabled; // ENV: HAKMEM_TINY_HEADER_V3_ENABLED
|
||||
bool header_v3_skip_c7; // ENV: HAKMEM_TINY_HEADER_V3_SKIP_C7
|
||||
} TinyFrontV3Snapshot;
|
||||
|
||||
// Size→class/route entry for Tiny front v3 LUT (route_kind は tiny_route_kind_t を想定)
|
||||
typedef struct TinyFrontV3SizeClassEntry {
|
||||
uint8_t class_idx;
|
||||
uint8_t route_kind;
|
||||
} TinyFrontV3SizeClassEntry;
|
||||
|
||||
#define TINY_FRONT_V3_INVALID_CLASS ((uint8_t)0xFF)
|
||||
|
||||
extern TinyFrontV3Snapshot g_tiny_front_v3_snapshot;
|
||||
extern int g_tiny_front_v3_snapshot_ready;
|
||||
|
||||
// ENV gate: default OFF
|
||||
static inline bool tiny_front_v3_enabled(void) {
|
||||
static int g_enable = -1;
|
||||
if (__builtin_expect(g_enable == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_FRONT_V3_ENABLED");
|
||||
g_enable = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g_enable != 0;
|
||||
}
|
||||
|
||||
// Optional: size→class LUT gate (default OFF, for A/B)
|
||||
static inline bool tiny_front_v3_lut_enabled(void) {
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_FRONT_V3_LUT_ENABLED");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g != 0;
|
||||
}
|
||||
|
||||
// Optional: route fast path (Tiny LUT→1 switch). Default OFF for easy rollback.
|
||||
static inline bool tiny_front_v3_route_fast_enabled(void) {
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_FRONT_V3_ROUTE_FAST_ENABLED");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g != 0;
|
||||
}
|
||||
|
||||
// Optional stats gate
|
||||
static inline bool tiny_front_v3_stats_enabled(void) {
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_FRONT_V3_STATS");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g != 0;
|
||||
}
|
||||
|
||||
// Header v3 experimental gate (default OFF)
|
||||
static inline bool tiny_header_v3_enabled(void) {
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_HEADER_V3_ENABLED");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g != 0;
|
||||
}
|
||||
|
||||
// Skip header write for C7 v3 allocs (bench/experiment, default OFF)
|
||||
static inline bool tiny_header_v3_skip_c7(void) {
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_HEADER_V3_SKIP_C7");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g != 0;
|
||||
}
|
||||
|
||||
// Snapshot initializer (implemented in hakmem_tiny.c)
|
||||
void tiny_front_v3_snapshot_init(void);
|
||||
|
||||
// LUT initializer / lookup (implemented in hakmem_tiny.c)
|
||||
void tiny_front_v3_size_class_lut_init(void);
|
||||
const TinyFrontV3SizeClassEntry* tiny_front_v3_lut_lookup(size_t size);
|
||||
|
||||
// Get cached snapshot (lazy init)
|
||||
static inline const TinyFrontV3Snapshot* tiny_front_v3_snapshot_get(void) {
|
||||
if (__builtin_expect(!g_tiny_front_v3_snapshot_ready, 0)) {
|
||||
tiny_front_v3_snapshot_init();
|
||||
}
|
||||
return &g_tiny_front_v3_snapshot;
|
||||
}
|
||||
@ -36,7 +36,8 @@ typedef struct tiny_hotheap_class_v2 {
|
||||
tiny_hotheap_page_v2* partial_pages;
|
||||
tiny_hotheap_page_v2* full_pages;
|
||||
uint16_t stride;
|
||||
uint16_t _pad;
|
||||
uint16_t max_partial_pages; // 空ページを保持する上限(C7 専用で 1〜2 を想定)
|
||||
uint16_t partial_count; // いま握っている partial の枚数
|
||||
tiny_hotheap_page_v2 storage_page; // C7 専用の 1 枚だけをまず保持(Phase36: reuse when空き)
|
||||
} tiny_hotheap_class_v2;
|
||||
|
||||
@ -51,8 +52,8 @@ extern __thread tiny_hotheap_ctx_v2* g_tiny_hotheap_ctx_v2;
|
||||
tiny_hotheap_ctx_v2* tiny_hotheap_v2_tls_get(void);
|
||||
void* tiny_hotheap_v2_alloc(uint8_t class_idx);
|
||||
void tiny_hotheap_v2_free(uint8_t class_idx, void* p, void* meta);
|
||||
void tiny_hotheap_v2_record_route_fallback(void);
|
||||
void tiny_hotheap_v2_record_free_fallback(void);
|
||||
void tiny_hotheap_v2_record_route_fallback(uint8_t class_idx);
|
||||
void tiny_hotheap_v2_record_free_fallback(uint8_t class_idx);
|
||||
|
||||
typedef struct tiny_hotheap_v2_stats_snapshot {
|
||||
uint64_t route_hits;
|
||||
@ -65,11 +66,19 @@ typedef struct tiny_hotheap_v2_stats_snapshot {
|
||||
uint64_t free_calls;
|
||||
uint64_t free_fast;
|
||||
uint64_t free_fallback_v1;
|
||||
uint64_t cold_refill_fail;
|
||||
uint64_t cold_retire_calls;
|
||||
uint64_t retire_calls_v2;
|
||||
uint64_t prepare_calls;
|
||||
uint64_t prepare_with_current_null;
|
||||
uint64_t prepare_from_partial;
|
||||
uint64_t free_made_current;
|
||||
uint64_t page_retired;
|
||||
uint64_t partial_pushes;
|
||||
uint64_t partial_pops;
|
||||
uint64_t partial_peak;
|
||||
uint64_t refill_with_current;
|
||||
uint64_t refill_with_partial;
|
||||
} tiny_hotheap_v2_stats_snapshot_t;
|
||||
|
||||
void tiny_hotheap_v2_debug_snapshot(tiny_hotheap_v2_stats_snapshot_t* out);
|
||||
|
||||
@ -9,10 +9,13 @@
|
||||
#include "../hakmem_tiny_config.h"
|
||||
#include "tiny_heap_env_box.h"
|
||||
|
||||
#include "smallobject_hotbox_v3_env_box.h"
|
||||
|
||||
typedef enum {
|
||||
TINY_ROUTE_LEGACY = 0,
|
||||
TINY_ROUTE_HEAP = 1, // TinyHeap v1
|
||||
TINY_ROUTE_HOTHEAP_V2 = 2, // TinyHotHeap v2
|
||||
TINY_ROUTE_HEAP = 1, // TinyHeap v1
|
||||
TINY_ROUTE_HOTHEAP_V2 = 2, // TinyHotHeap v2
|
||||
TINY_ROUTE_SMALL_HEAP_V3 = 3, // SmallObject HotHeap v3 (C7-first,研究箱)
|
||||
} tiny_route_kind_t;
|
||||
|
||||
extern tiny_route_kind_t g_tiny_route_class[TINY_NUM_CLASSES];
|
||||
@ -20,7 +23,9 @@ extern int g_tiny_route_snapshot_done;
|
||||
|
||||
static inline void tiny_route_snapshot_init(void) {
|
||||
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
|
||||
if (tiny_hotheap_v2_class_enabled((uint8_t)i)) {
|
||||
if (small_heap_v3_class_enabled((uint8_t)i)) {
|
||||
g_tiny_route_class[i] = TINY_ROUTE_SMALL_HEAP_V3;
|
||||
} else if (tiny_hotheap_v2_class_enabled((uint8_t)i)) {
|
||||
g_tiny_route_class[i] = TINY_ROUTE_HOTHEAP_V2;
|
||||
} else if (tiny_heap_box_enabled() && tiny_heap_class_route_enabled(i)) {
|
||||
g_tiny_route_class[i] = TINY_ROUTE_HEAP;
|
||||
@ -42,7 +47,7 @@ static inline tiny_route_kind_t tiny_route_for_class(uint8_t ci) {
|
||||
}
|
||||
|
||||
static inline int tiny_route_is_heap_kind(tiny_route_kind_t route) {
|
||||
return route == TINY_ROUTE_HEAP || route == TINY_ROUTE_HOTHEAP_V2;
|
||||
return route == TINY_ROUTE_HEAP || route == TINY_ROUTE_HOTHEAP_V2 || route == TINY_ROUTE_SMALL_HEAP_V3;
|
||||
}
|
||||
|
||||
// C7 front が TinyHeap を使うか(Route snapshot 経由で判定)
|
||||
|
||||
@ -40,6 +40,8 @@
|
||||
#include "../box/tiny_c7_hotbox.h" // Optional: C7 専用ホットボックス
|
||||
#include "../box/tiny_heap_box.h" // TinyHeap 汎用 Box
|
||||
#include "../box/tiny_hotheap_v2_box.h" // TinyHotHeap v2 (Phase31 A/B)
|
||||
#include "../box/smallobject_hotbox_v3_box.h" // SmallObject HotHeap v3 skeleton
|
||||
#include "../box/tiny_front_v3_env_box.h" // Tiny front v3 snapshot gate
|
||||
#include "../box/tiny_heap_env_box.h" // ENV gate for TinyHeap front (A/B)
|
||||
#include "../box/tiny_route_env_box.h" // Route snapshot (Heap vs Legacy)
|
||||
#include "../box/tiny_front_stats_box.h" // Front class distribution counters
|
||||
@ -102,24 +104,58 @@ static inline int front_gate_unified_enabled(void) {
|
||||
//
|
||||
__attribute__((always_inline))
|
||||
static inline void* malloc_tiny_fast(size_t size) {
|
||||
// size → class_idx を 1 回だけ決定
|
||||
int class_idx = hak_tiny_size_to_class(size);
|
||||
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
||||
return NULL;
|
||||
const int front_v3_on = tiny_front_v3_enabled();
|
||||
const TinyFrontV3Snapshot* front_snap =
|
||||
__builtin_expect(front_v3_on, 0) ? tiny_front_v3_snapshot_get() : NULL;
|
||||
const bool route_fast_on = front_v3_on && tiny_front_v3_lut_enabled() &&
|
||||
tiny_front_v3_route_fast_enabled();
|
||||
|
||||
int class_idx = -1;
|
||||
tiny_route_kind_t route = TINY_ROUTE_LEGACY;
|
||||
bool route_trusted = false;
|
||||
|
||||
if (front_v3_on && tiny_front_v3_lut_enabled()) {
|
||||
const TinyFrontV3SizeClassEntry* e = tiny_front_v3_lut_lookup(size);
|
||||
if (e && e->class_idx != TINY_FRONT_V3_INVALID_CLASS) {
|
||||
class_idx = (int)e->class_idx;
|
||||
route = (tiny_route_kind_t)e->route_kind;
|
||||
route_trusted = route_fast_on;
|
||||
}
|
||||
}
|
||||
|
||||
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
||||
class_idx = hak_tiny_size_to_class(size);
|
||||
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
||||
return NULL;
|
||||
}
|
||||
route = tiny_route_for_class((uint8_t)class_idx);
|
||||
route_trusted = false;
|
||||
} else if (!route_trusted &&
|
||||
route != TINY_ROUTE_LEGACY && route != TINY_ROUTE_HEAP &&
|
||||
route != TINY_ROUTE_HOTHEAP_V2 && route != TINY_ROUTE_SMALL_HEAP_V3) {
|
||||
route = tiny_route_for_class((uint8_t)class_idx);
|
||||
}
|
||||
|
||||
tiny_front_alloc_stat_inc(class_idx);
|
||||
|
||||
tiny_route_kind_t route = tiny_route_for_class((uint8_t)class_idx);
|
||||
switch (route) {
|
||||
case TINY_ROUTE_HOTHEAP_V2: {
|
||||
if (class_idx == 7) {
|
||||
void* v2p = tiny_hotheap_v2_alloc(7);
|
||||
if (TINY_HOT_LIKELY(v2p != NULL)) {
|
||||
return v2p;
|
||||
}
|
||||
tiny_hotheap_v2_record_route_fallback();
|
||||
case TINY_ROUTE_SMALL_HEAP_V3: {
|
||||
void* v3p = so_alloc((uint32_t)class_idx);
|
||||
if (TINY_HOT_LIKELY(v3p != NULL)) {
|
||||
return v3p;
|
||||
}
|
||||
so_v3_record_alloc_fallback((uint8_t)class_idx);
|
||||
// fallthrough to v2/v1
|
||||
__attribute__((fallthrough));
|
||||
}
|
||||
case TINY_ROUTE_HOTHEAP_V2: {
|
||||
void* v2p = tiny_hotheap_v2_alloc((uint8_t)class_idx);
|
||||
if (TINY_HOT_LIKELY(v2p != NULL)) {
|
||||
return v2p;
|
||||
}
|
||||
tiny_hotheap_v2_record_route_fallback((uint8_t)class_idx);
|
||||
// fallthrough to TinyHeap v1
|
||||
__attribute__((fallthrough));
|
||||
}
|
||||
case TINY_ROUTE_HEAP: {
|
||||
void* heap_ptr = NULL;
|
||||
@ -139,7 +175,10 @@ static inline void* malloc_tiny_fast(size_t size) {
|
||||
}
|
||||
|
||||
// Legacy Tiny front
|
||||
void* ptr = tiny_hot_alloc_fast(class_idx);
|
||||
void* ptr = NULL;
|
||||
if (!front_snap || front_snap->unified_cache_on) {
|
||||
ptr = tiny_hot_alloc_fast(class_idx);
|
||||
}
|
||||
if (TINY_HOT_LIKELY(ptr != NULL)) {
|
||||
return ptr;
|
||||
}
|
||||
@ -192,6 +231,8 @@ static inline int free_tiny_fast(void* ptr) {
|
||||
tiny_front_free_stat_inc(class_idx);
|
||||
tiny_route_kind_t route = tiny_route_for_class((uint8_t)class_idx);
|
||||
const int use_tiny_heap = tiny_route_is_heap_kind(route);
|
||||
const TinyFrontV3Snapshot* front_snap =
|
||||
__builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL;
|
||||
|
||||
// TWO-SPEED: SuperSlab registration check is DEBUG-ONLY to keep HOT PATH fast.
|
||||
// In Release builds, we trust header magic (0xA0) as sufficient validation.
|
||||
@ -255,6 +296,9 @@ static inline int free_tiny_fast(void* ptr) {
|
||||
// Same-thread + TinyHeap route → route-based free
|
||||
if (__builtin_expect(use_tiny_heap, 0)) {
|
||||
switch (route) {
|
||||
case TINY_ROUTE_SMALL_HEAP_V3:
|
||||
so_free((uint32_t)class_idx, base);
|
||||
return 1;
|
||||
case TINY_ROUTE_HOTHEAP_V2:
|
||||
tiny_hotheap_v2_free((uint8_t)class_idx, base, meta);
|
||||
return 1;
|
||||
@ -276,7 +320,9 @@ static inline int free_tiny_fast(void* ptr) {
|
||||
if (use_tiny_heap) {
|
||||
// fallback: lookup failed but TinyHeap front is ON → use generic TinyHeap free
|
||||
if (route == TINY_ROUTE_HOTHEAP_V2) {
|
||||
tiny_hotheap_v2_record_free_fallback();
|
||||
tiny_hotheap_v2_record_free_fallback((uint8_t)class_idx);
|
||||
} else if (route == TINY_ROUTE_SMALL_HEAP_V3) {
|
||||
so_v3_record_free_fallback((uint8_t)class_idx);
|
||||
}
|
||||
tiny_heap_free_class_fast(tiny_heap_ctx_for_thread(), class_idx, ptr);
|
||||
return 1;
|
||||
@ -300,7 +346,10 @@ static inline int free_tiny_fast(void* ptr) {
|
||||
}
|
||||
#endif
|
||||
|
||||
int pushed = unified_cache_push(class_idx, HAK_BASE_FROM_RAW(base));
|
||||
int pushed = 0;
|
||||
if (!front_snap || front_snap->unified_cache_on) {
|
||||
pushed = unified_cache_push(class_idx, HAK_BASE_FROM_RAW(base));
|
||||
}
|
||||
if (__builtin_expect(pushed, 1)) {
|
||||
return 1; // Success
|
||||
}
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
#include "hakmem_sys.h" // Phase 6.11.1: Syscall wrappers with timing
|
||||
#include "hakmem_whale.h" // Phase 6.11.1: Whale fast-path cache
|
||||
#include "hakmem_env_cache.h" // Priority-2: ENV cache
|
||||
#include "box/ss_os_acquire_box.h" // madvise guard
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
@ -117,12 +118,17 @@ void hak_batch_flush(void) {
|
||||
size_t size = snap.sizes[i];
|
||||
|
||||
// Step 1: MADV_FREE to release physical pages (fast, low TLB cost)
|
||||
int ret = madvise(ptr, size, MADV_FREE);
|
||||
int ret = ss_os_madvise_guarded(ptr, size, MADV_FREE, "batch_free");
|
||||
if (ret != 0) {
|
||||
if (HAK_ENV_SS_MADVISE_STRICT() && errno == EINVAL) {
|
||||
fprintf(stderr, "[Batch] madvise(MADV_FREE) EINVAL (STRICT). Aborting.\n");
|
||||
abort();
|
||||
}
|
||||
// Fallback to MADV_DONTNEED if MADV_FREE not supported
|
||||
ret = madvise(ptr, size, MADV_DONTNEED);
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "[Batch] Warning: madvise failed for block %p (size %zu)\n", ptr, size);
|
||||
ret = ss_os_madvise_guarded(ptr, size, MADV_DONTNEED, "batch_dontneed");
|
||||
if (ret != 0 && HAK_ENV_SS_MADVISE_STRICT() && errno == EINVAL) {
|
||||
fprintf(stderr, "[Batch] madvise(MADV_DONTNEED) EINVAL (STRICT). Aborting.\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -91,6 +91,9 @@ typedef struct {
|
||||
// ===== Cold Path: Batch (1 variable) =====
|
||||
int batch_bg; // HAKMEM_BATCH_BG (default: 0)
|
||||
|
||||
// ===== Cold Path: Superslab Madvise (1 variable) =====
|
||||
int ss_madvise_strict; // HAKMEM_SS_MADVISE_STRICT (default: 1)
|
||||
|
||||
} HakEnvCache;
|
||||
|
||||
// Global cache instance (initialized once at startup)
|
||||
@ -289,10 +292,17 @@ static inline void hakmem_env_cache_init(void) {
|
||||
g_hak_env_cache.batch_bg = (e && atoi(e) != 0) ? 1 : 0; // default: 0 (OFF)
|
||||
}
|
||||
|
||||
// ===== Cold Path: Superslab Madvise =====
|
||||
{
|
||||
const char* e = getenv("HAKMEM_SS_MADVISE_STRICT");
|
||||
// Default: 1 (STRICT), set HAKMEM_SS_MADVISE_STRICT=0 to relax
|
||||
g_hak_env_cache.ss_madvise_strict = (e && *e && *e == '0') ? 0 : 1;
|
||||
}
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Debug: Print cache summary (stderr only)
|
||||
if (!g_hak_env_cache.quiet) {
|
||||
fprintf(stderr, "[ENV_CACHE_INIT] Parsed %d ENV variables at startup\n", 49);
|
||||
fprintf(stderr, "[ENV_CACHE_INIT] Parsed %d ENV variables at startup\n", 50);
|
||||
fprintf(stderr, "[ENV_CACHE_INIT] Hot path syscalls eliminated: ~2000/sec → 0/sec\n");
|
||||
fflush(stderr);
|
||||
}
|
||||
@ -361,4 +371,7 @@ static inline void hakmem_env_cache_init(void) {
|
||||
// Cold path: Batch
|
||||
#define HAK_ENV_BATCH_BG() (g_hak_env_cache.batch_bg)
|
||||
|
||||
// Cold path: Superslab Madvise
|
||||
#define HAK_ENV_SS_MADVISE_STRICT() (g_hak_env_cache.ss_madvise_strict)
|
||||
|
||||
#endif // HAKMEM_ENV_CACHE_H
|
||||
|
||||
@ -49,6 +49,7 @@
|
||||
#include "hakmem_l25_pool.h"
|
||||
#include "hakmem_config.h"
|
||||
#include "hakmem_internal.h" // For AllocHeader and HAKMEM_MAGIC
|
||||
#include "box/ss_os_acquire_box.h"
|
||||
#include "hakmem_syscall.h" // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD)
|
||||
#include "box/pagefault_telemetry_box.h" // Box PageFaultTelemetry (PF_BUCKET_L25)
|
||||
#include "page_arena.h" // Phase 24: PageArena integration for L25
|
||||
@ -560,7 +561,7 @@ void hak_l25_pool_free_fast(void* user_ptr, uintptr_t site_id) {
|
||||
|
||||
// Optional: demand-zero for larger classes
|
||||
if (g_l25_pool.demand_zero && class_idx >= 3) {
|
||||
madvise((char*)raw, HEADER_SIZE + g_class_sizes[class_idx], MADV_DONTNEED);
|
||||
(void)ss_os_madvise_guarded((char*)raw, HEADER_SIZE + g_class_sizes[class_idx], MADV_DONTNEED, "l25_pool_dontneed_class");
|
||||
}
|
||||
|
||||
// Same-thread hint: prefer per-block owner if header present (HDR_LIGHT>=1), else page owner
|
||||
@ -1118,7 +1119,7 @@ void hak_l25_pool_free(void* ptr, size_t size, uintptr_t site_id) {
|
||||
if (g_l25_pool.demand_zero) {
|
||||
int class_idx_dz = hak_l25_pool_get_class_index(size);
|
||||
if (class_idx_dz >= 3) {
|
||||
madvise((char*)raw, HEADER_SIZE + size, MADV_DONTNEED);
|
||||
(void)ss_os_madvise_guarded((char*)raw, HEADER_SIZE + size, MADV_DONTNEED, "l25_pool_dontneed_size");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -46,7 +46,9 @@
|
||||
#include "hakmem_pool.h"
|
||||
#include "hakmem_config.h"
|
||||
#include "hakmem_internal.h" // For AllocHeader and HAKMEM_MAGIC
|
||||
#include "box/pool_hotbox_v2_header_box.h"
|
||||
#include "hakmem_syscall.h" // Box 3 syscall layer (bypasses LD_PRELOAD)
|
||||
#include "box/pool_hotbox_v2_box.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
@ -58,6 +60,11 @@
|
||||
#include "hakmem_policy.h" // FrozenPolicy caps (Soft CAP gating)
|
||||
#include "hakmem_debug.h"
|
||||
|
||||
#define POOL_HOTBOX_V2_HEADER_BYTES ((size_t)sizeof(void*))
|
||||
// Use an over-sized mapping to guarantee POOL_PAGE_SIZE alignment for the
|
||||
// v2 page base. This keeps page_of() O(1) without relying on mmap alignment.
|
||||
#define POOL_HOTBOX_V2_MAP_LEN (POOL_PAGE_SIZE * 2)
|
||||
|
||||
// False sharing mitigation: padded mutex type (64B)
|
||||
typedef struct { pthread_mutex_t m; char _pad[64 - (sizeof(pthread_mutex_t) % 64)]; } PaddedMutex;
|
||||
|
||||
@ -808,6 +815,513 @@ static int g_pool_min_bundle = 2; // env: HAKMEM_POOL_MIN_BUNDLE (default 2)
|
||||
static int g_count_sample_exp = 10; // env: HAKMEM_POOL_COUNT_SAMPLE (0..16)
|
||||
static __thread uint32_t t_pool_rng = 0x243f6a88u; // per-thread RNG for sampling
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PoolHotBox v2 scaffolding (research-only; defaults to v1)
|
||||
// ---------------------------------------------------------------------------
|
||||
PoolHotBoxV2Stats g_pool_hotbox_v2_stats[POOL_NUM_CLASSES];
|
||||
static __thread pool_ctx_v2* g_pool_ctx_v2 = NULL;
|
||||
|
||||
// Forward decls for helpers used in HotBox v2.
|
||||
static inline uint32_t pool_hotbox_v2_block_size(int ci);
|
||||
static inline uint32_t pool_block_size_for_class(int ci);
|
||||
static inline void mid_set_header(AllocHeader* hdr, size_t class_sz, uintptr_t site_id);
|
||||
static inline void mid_page_inuse_inc(void* raw);
|
||||
static void* pool_cold_refill_page_v1(void* cold_ctx, uint32_t ci, uint32_t* out_block_size, uint32_t* out_capacity, void** out_slab_ref);
|
||||
static void pool_cold_retire_page_v1(void* cold_ctx, uint32_t ci, void* slab_ref, void* base);
|
||||
|
||||
static int pool_hotbox_v2_global_enabled(void) {
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_POOL_V2_ENABLED");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
static unsigned pool_hotbox_v2_class_mask(void) {
|
||||
static int parsed = 0;
|
||||
static unsigned mask = 0;
|
||||
if (__builtin_expect(!parsed, 0)) {
|
||||
const char* e = getenv("HAKMEM_POOL_V2_CLASSES");
|
||||
if (e && *e) {
|
||||
mask = (unsigned)strtoul(e, NULL, 0);
|
||||
} else {
|
||||
mask = 0; // default: all OFF (opt-in only)
|
||||
}
|
||||
parsed = 1;
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
int pool_hotbox_v2_class_enabled(int class_idx) {
|
||||
if (!pool_hotbox_v2_global_enabled()) return 0;
|
||||
if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) return 0;
|
||||
unsigned mask = pool_hotbox_v2_class_mask();
|
||||
static int logged = 0;
|
||||
if (__builtin_expect(!logged && pool_hotbox_v2_stats_enabled(), 0)) {
|
||||
fprintf(stderr, "[POOL_V2_MASK] enabled=0x%x\n", mask);
|
||||
logged = 1;
|
||||
}
|
||||
return (mask & (1u << class_idx)) != 0;
|
||||
}
|
||||
|
||||
int pool_hotbox_v2_stats_enabled(void) {
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_POOL_V2_STATS");
|
||||
g = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g;
|
||||
}
|
||||
|
||||
pool_ctx_v2* pool_v2_tls_get(void) {
|
||||
pool_ctx_v2* ctx = g_pool_ctx_v2;
|
||||
if (__builtin_expect(ctx == NULL, 0)) {
|
||||
ctx = (pool_ctx_v2*)calloc(1, sizeof(pool_ctx_v2));
|
||||
if (!ctx) abort();
|
||||
for (int i = 0; i < POOL_NUM_CLASSES; i++) {
|
||||
uint32_t user_sz = pool_block_size_for_class(i);
|
||||
ctx->cls[i].block_size = user_sz ? (user_sz + HEADER_SIZE) : 0;
|
||||
ctx->cls[i].max_partial_pages = 2;
|
||||
}
|
||||
g_pool_ctx_v2 = ctx;
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline uint32_t pool_hotbox_v2_block_size(int ci) {
|
||||
switch (ci) {
|
||||
case 0: return POOL_CLASS_2KB;
|
||||
case 1: return POOL_CLASS_4KB;
|
||||
case 2: return POOL_CLASS_8KB;
|
||||
case 3: return POOL_CLASS_16KB;
|
||||
case 4: return POOL_CLASS_32KB;
|
||||
case 5: return POOL_CLASS_40KB;
|
||||
case 6: return POOL_CLASS_52KB;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint32_t pool_block_size_for_class(int ci) {
|
||||
return pool_hotbox_v2_block_size(ci);
|
||||
}
|
||||
|
||||
static inline void pool_hotbox_v2_record_alloc(uint32_t ci) {
|
||||
if ((int)ci >= POOL_NUM_CLASSES) return;
|
||||
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_calls, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void pool_hotbox_v2_record_alloc_refill(uint32_t ci) {
|
||||
if ((int)ci >= POOL_NUM_CLASSES) return;
|
||||
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_refill, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void pool_hotbox_v2_record_alloc_refill_fail(uint32_t ci) {
|
||||
if ((int)ci >= POOL_NUM_CLASSES) return;
|
||||
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_refill_fail, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void pool_hotbox_v2_record_alloc_fallback(uint32_t ci) {
|
||||
if ((int)ci >= POOL_NUM_CLASSES) return;
|
||||
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_fallback_v1, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void pool_hotbox_v2_record_free(uint32_t ci) {
|
||||
if ((int)ci >= POOL_NUM_CLASSES) return;
|
||||
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].free_calls, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void pool_hotbox_v2_record_free_call(uint32_t ci) {
|
||||
pool_hotbox_v2_record_free(ci);
|
||||
}
|
||||
|
||||
void pool_hotbox_v2_record_free_fallback(uint32_t ci) {
|
||||
if ((int)ci >= POOL_NUM_CLASSES) return;
|
||||
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].free_fallback_v1, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
enum pool_v2_pageof_fail {
|
||||
POOL_V2_PAGEOF_NONE = 0,
|
||||
POOL_V2_PAGEOF_OUT_OF_RANGE = 1,
|
||||
POOL_V2_PAGEOF_MISALIGNED = 2,
|
||||
POOL_V2_PAGEOF_HEADER_MISSING = 3,
|
||||
POOL_V2_PAGEOF_UNKNOWN = 4,
|
||||
};
|
||||
|
||||
static inline void pool_hotbox_v2_record_pageof_fail(uint32_t ci, int reason) {
|
||||
if ((int)ci >= POOL_NUM_CLASSES) return;
|
||||
switch (reason) {
|
||||
case POOL_V2_PAGEOF_HEADER_MISSING:
|
||||
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].page_of_fail_header_missing, 1, memory_order_relaxed);
|
||||
break;
|
||||
case POOL_V2_PAGEOF_OUT_OF_RANGE:
|
||||
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].page_of_fail_out_of_range, 1, memory_order_relaxed);
|
||||
break;
|
||||
case POOL_V2_PAGEOF_MISALIGNED:
|
||||
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].page_of_fail_misaligned, 1, memory_order_relaxed);
|
||||
break;
|
||||
case POOL_V2_PAGEOF_UNKNOWN:
|
||||
default:
|
||||
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].page_of_fail_unknown, 1, memory_order_relaxed);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static pool_page_v2* pool_hotbox_v2_page_acquire(void) {
|
||||
pool_page_v2* p = (pool_page_v2*)calloc(1, sizeof(pool_page_v2));
|
||||
return p;
|
||||
}
|
||||
|
||||
static void pool_hotbox_v2_page_release(pool_page_v2* p) {
|
||||
free(p);
|
||||
}
|
||||
|
||||
static void* pool_hotbox_v2_build_freelist(pool_page_v2* p) {
|
||||
if (!p || !p->base || p->block_size == 0 || p->capacity == 0) return NULL;
|
||||
uint8_t* base = (uint8_t*)p->base + POOL_HOTBOX_V2_HEADER_BYTES;
|
||||
void* head = NULL;
|
||||
for (uint32_t i = 0; i < p->capacity; i++) {
|
||||
void* blk = base + ((size_t)i * p->block_size);
|
||||
*(void**)blk = head;
|
||||
head = blk;
|
||||
}
|
||||
return head;
|
||||
}
|
||||
|
||||
static PoolColdIface pool_cold_iface_v1(void);
|
||||
|
||||
static pool_page_v2* pool_hotbox_v2_page_of(pool_ctx_v2* ctx, uint32_t ci, void* ptr, int* out_reason) {
|
||||
if (out_reason) *out_reason = POOL_V2_PAGEOF_UNKNOWN;
|
||||
if (!ctx || ci >= POOL_NUM_CLASSES || !ptr) return NULL;
|
||||
// Compute page base by mask (POOL_PAGE_SIZE is a power of two).
|
||||
void* page_base = pool_hotbox_v2_page_base(ptr, POOL_PAGE_SIZE);
|
||||
pool_page_v2* p = (pool_page_v2*)pool_hotbox_v2_header_load(page_base);
|
||||
if (!p) {
|
||||
if (out_reason) *out_reason = POOL_V2_PAGEOF_HEADER_MISSING;
|
||||
return NULL;
|
||||
}
|
||||
if (p->class_idx != ci || !p->base) {
|
||||
if (out_reason) *out_reason = POOL_V2_PAGEOF_UNKNOWN;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uint8_t* data_base = (uint8_t*)p->base + POOL_HOTBOX_V2_HEADER_BYTES;
|
||||
size_t span = (size_t)p->block_size * (size_t)p->capacity;
|
||||
uintptr_t off = (uintptr_t)((uint8_t*)ptr - data_base);
|
||||
if (off >= span) {
|
||||
if (out_reason) *out_reason = POOL_V2_PAGEOF_OUT_OF_RANGE;
|
||||
return NULL;
|
||||
}
|
||||
if (off % p->block_size != 0) {
|
||||
if (out_reason) *out_reason = POOL_V2_PAGEOF_MISALIGNED;
|
||||
return NULL;
|
||||
}
|
||||
if (out_reason) *out_reason = POOL_V2_PAGEOF_NONE;
|
||||
return p;
|
||||
}
|
||||
|
||||
static void pool_hotbox_v2_page_retire_slow(pool_ctx_v2* ctx, uint32_t ci, pool_page_v2* p) {
|
||||
(void)ctx;
|
||||
if (!p) return;
|
||||
// Clear reverse header to avoid stale page_of hits.
|
||||
pool_hotbox_v2_header_clear(p->base);
|
||||
PoolColdIface cold = pool_cold_iface_v1();
|
||||
if (cold.retire_page) {
|
||||
void* cold_ctx = NULL;
|
||||
cold.retire_page(cold_ctx, ci, p->slab_ref, p->base);
|
||||
}
|
||||
pool_hotbox_v2_page_release(p);
|
||||
}
|
||||
|
||||
static void pool_hotbox_v2_push_partial(pool_class_v2* hc, pool_page_v2* p) {
|
||||
if (!hc || !p) return;
|
||||
p->next = hc->partial;
|
||||
hc->partial = p;
|
||||
if (hc->partial_count < UINT16_MAX) hc->partial_count++;
|
||||
}
|
||||
|
||||
static pool_page_v2* pool_hotbox_v2_pop_partial(pool_class_v2* hc) {
|
||||
if (!hc || !hc->partial) return NULL;
|
||||
pool_page_v2* p = hc->partial;
|
||||
hc->partial = p->next;
|
||||
p->next = NULL;
|
||||
if (hc->partial_count > 0) hc->partial_count--;
|
||||
return p;
|
||||
}
|
||||
|
||||
static pool_page_v2* pool_hotbox_v2_take_usable_partial(pool_class_v2* hc) {
|
||||
if (!hc) return NULL;
|
||||
pool_page_v2* prev = NULL;
|
||||
pool_page_v2* p = hc->partial;
|
||||
while (p) {
|
||||
if (p->freelist && p->used < p->capacity) {
|
||||
if (prev) {
|
||||
prev->next = p->next;
|
||||
} else {
|
||||
hc->partial = p->next;
|
||||
}
|
||||
p->next = NULL;
|
||||
if (hc->partial_count > 0) hc->partial_count--;
|
||||
return p;
|
||||
}
|
||||
prev = p;
|
||||
p = p->next;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int pool_hotbox_v2_unlink_partial(pool_class_v2* hc, pool_page_v2* target) {
|
||||
if (!hc || !target) return 0;
|
||||
pool_page_v2* prev = NULL;
|
||||
pool_page_v2* p = hc->partial;
|
||||
while (p) {
|
||||
if (p == target) {
|
||||
if (prev) {
|
||||
prev->next = p->next;
|
||||
} else {
|
||||
hc->partial = p->next;
|
||||
}
|
||||
p->next = NULL;
|
||||
if (hc->partial_count > 0) hc->partial_count--;
|
||||
return 1;
|
||||
}
|
||||
prev = p;
|
||||
p = p->next;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pool_hotbox_v2_record_alloc_fast(uint32_t ci) {
|
||||
if ((int)ci >= POOL_NUM_CLASSES) return;
|
||||
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_fast, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static void pool_hotbox_v2_record_free_fast(uint32_t ci) {
|
||||
if ((int)ci >= POOL_NUM_CLASSES) return;
|
||||
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].free_fast, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void* pool_hotbox_v2_alloc_fast(pool_ctx_v2* ctx, uint32_t ci, uintptr_t site_id) {
|
||||
pool_class_v2* hc = &ctx->cls[ci];
|
||||
pool_page_v2* p = hc->current;
|
||||
if (p && p->freelist && p->used < p->capacity) {
|
||||
void* blk = p->freelist;
|
||||
p->freelist = *(void**)blk;
|
||||
p->used++;
|
||||
pool_hotbox_v2_record_alloc_fast(ci);
|
||||
AllocHeader* hdr = (AllocHeader*)blk;
|
||||
size_t class_sz = pool_hotbox_v2_block_size((int)ci);
|
||||
mid_set_header(hdr, class_sz, site_id);
|
||||
mid_page_inuse_inc(blk);
|
||||
return (char*)blk + HEADER_SIZE;
|
||||
}
|
||||
if (p) {
|
||||
// Keep exhausted current reachable for free()
|
||||
pool_hotbox_v2_push_partial(hc, p);
|
||||
hc->current = NULL;
|
||||
}
|
||||
p = pool_hotbox_v2_take_usable_partial(hc);
|
||||
if (p) {
|
||||
hc->current = p;
|
||||
void* blk = p->freelist;
|
||||
p->freelist = *(void**)blk;
|
||||
p->used++;
|
||||
pool_hotbox_v2_record_alloc_fast(ci);
|
||||
AllocHeader* hdr = (AllocHeader*)blk;
|
||||
size_t class_sz = pool_hotbox_v2_block_size((int)ci);
|
||||
mid_set_header(hdr, class_sz, site_id);
|
||||
mid_page_inuse_inc(blk);
|
||||
return (char*)blk + HEADER_SIZE;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void pool_hotbox_v2_page_init(pool_page_v2* p, uint32_t ci, void* base, uint32_t block_size, uint32_t capacity, void* slab_ref) {
|
||||
if (!p) return;
|
||||
// Adjust capacity if caller did not account for header reservation.
|
||||
size_t avail = (POOL_PAGE_SIZE > POOL_HOTBOX_V2_HEADER_BYTES) ? (POOL_PAGE_SIZE - POOL_HOTBOX_V2_HEADER_BYTES) : 0;
|
||||
if (block_size > 0) {
|
||||
uint32_t max_cap = (uint32_t)(avail / (size_t)block_size);
|
||||
if (capacity == 0 || capacity > max_cap) capacity = max_cap;
|
||||
}
|
||||
p->freelist = NULL;
|
||||
p->used = 0;
|
||||
p->capacity = capacity;
|
||||
p->block_size = block_size;
|
||||
p->class_idx = ci;
|
||||
p->base = base;
|
||||
p->slab_ref = slab_ref;
|
||||
p->next = NULL;
|
||||
pool_hotbox_v2_header_store(p->base, p);
|
||||
}
|
||||
|
||||
static PoolColdIface pool_cold_iface_v1(void) {
|
||||
PoolColdIface iface = {pool_cold_refill_page_v1, pool_cold_retire_page_v1};
|
||||
return iface;
|
||||
}
|
||||
|
||||
static void* pool_cold_refill_page_v1(void* cold_ctx, uint32_t ci, uint32_t* out_block_size, uint32_t* out_capacity, void** out_slab_ref) {
|
||||
(void)cold_ctx;
|
||||
uint32_t user_sz = pool_hotbox_v2_block_size((int)ci);
|
||||
if (user_sz == 0) return NULL;
|
||||
uint32_t bs = user_sz + HEADER_SIZE;
|
||||
if (bs == 0) return NULL;
|
||||
uint32_t cap = 0;
|
||||
if (POOL_PAGE_SIZE > POOL_HOTBOX_V2_HEADER_BYTES) {
|
||||
cap = (uint32_t)((POOL_PAGE_SIZE - POOL_HOTBOX_V2_HEADER_BYTES) / bs);
|
||||
}
|
||||
if (cap == 0) return NULL;
|
||||
|
||||
// Over-allocate so we can align to POOL_PAGE_SIZE (64KiB) for O(1) page_of.
|
||||
void* raw = mmap(NULL, POOL_HOTBOX_V2_MAP_LEN, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (raw == MAP_FAILED || !raw) {
|
||||
return NULL;
|
||||
}
|
||||
uintptr_t aligned = ((uintptr_t)raw + (POOL_PAGE_SIZE - 1)) & ~((uintptr_t)POOL_PAGE_SIZE - 1);
|
||||
void* base = (void*)aligned;
|
||||
|
||||
// Register page ownership for same-thread fast free consistency.
|
||||
mid_desc_register(base, (int)ci, (uint64_t)(uintptr_t)pthread_self());
|
||||
g_pool.refills[ci]++;
|
||||
g_pool.total_pages_allocated++;
|
||||
g_pool.pages_by_class[ci]++;
|
||||
g_pool.total_bytes_allocated += POOL_HOTBOX_V2_MAP_LEN;
|
||||
|
||||
if (out_block_size) *out_block_size = bs;
|
||||
if (out_capacity) *out_capacity = cap;
|
||||
// slab_ref keeps the raw mapping pointer for unmap.
|
||||
if (out_slab_ref) *out_slab_ref = raw;
|
||||
return base;
|
||||
}
|
||||
|
||||
static void pool_cold_retire_page_v1(void* cold_ctx, uint32_t ci, void* slab_ref, void* base) {
|
||||
(void)cold_ctx;
|
||||
(void)ci;
|
||||
void* addr = slab_ref ? slab_ref : base;
|
||||
if (!addr) return;
|
||||
if (ci < POOL_NUM_CLASSES) {
|
||||
if (g_pool.pages_by_class[ci] > 0) g_pool.pages_by_class[ci]--;
|
||||
}
|
||||
if (g_pool.total_pages_allocated > 0) g_pool.total_pages_allocated--;
|
||||
if (g_pool.total_bytes_allocated >= POOL_HOTBOX_V2_MAP_LEN) g_pool.total_bytes_allocated -= POOL_HOTBOX_V2_MAP_LEN;
|
||||
munmap(addr, POOL_HOTBOX_V2_MAP_LEN);
|
||||
}
|
||||
|
||||
void* pool_hotbox_v2_alloc(uint32_t class_idx, size_t size, uintptr_t site_id) {
|
||||
(void)size;
|
||||
(void)site_id;
|
||||
if ((int)class_idx < 0 || class_idx >= POOL_NUM_CLASSES) return NULL;
|
||||
pool_hotbox_v2_record_alloc(class_idx);
|
||||
|
||||
pool_ctx_v2* ctx = pool_v2_tls_get();
|
||||
void* blk = pool_hotbox_v2_alloc_fast(ctx, class_idx, site_id);
|
||||
if (blk) return blk;
|
||||
|
||||
// slow: refill via Cold IF
|
||||
PoolColdIface cold = pool_cold_iface_v1();
|
||||
uint32_t bs = 0, cap = 0;
|
||||
void* slab_ref = NULL;
|
||||
void* base = cold.refill_page ? cold.refill_page(NULL, class_idx, &bs, &cap, &slab_ref) : NULL;
|
||||
if (!base || !bs || !cap) {
|
||||
pool_hotbox_v2_record_alloc_refill_fail(class_idx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pool_class_v2* hc = &ctx->cls[class_idx];
|
||||
pool_page_v2* page = pool_hotbox_v2_page_acquire();
|
||||
if (!page) {
|
||||
if (cold.retire_page) cold.retire_page(NULL, class_idx, slab_ref, base);
|
||||
pool_hotbox_v2_record_alloc_refill_fail(class_idx);
|
||||
return NULL;
|
||||
}
|
||||
pool_hotbox_v2_page_init(page, class_idx, base, bs, cap, slab_ref);
|
||||
page->freelist = pool_hotbox_v2_build_freelist(page);
|
||||
if (!page->freelist) {
|
||||
pool_hotbox_v2_record_alloc_refill_fail(class_idx);
|
||||
if (cold.retire_page) cold.retire_page(NULL, class_idx, slab_ref, base);
|
||||
pool_hotbox_v2_page_release(page);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
hc->current = page;
|
||||
pool_hotbox_v2_record_alloc_refill(class_idx);
|
||||
return pool_hotbox_v2_alloc_fast(ctx, class_idx, site_id);
|
||||
}
|
||||
|
||||
int pool_hotbox_v2_free(uint32_t class_idx, void* raw_block) {
|
||||
if (!raw_block || (int)class_idx < 0 || class_idx >= POOL_NUM_CLASSES) return 0;
|
||||
pool_hotbox_v2_record_free(class_idx);
|
||||
|
||||
pool_ctx_v2* ctx = pool_v2_tls_get();
|
||||
|
||||
int pageof_reason = POOL_V2_PAGEOF_UNKNOWN;
|
||||
pool_page_v2* p = pool_hotbox_v2_page_of(ctx, class_idx, raw_block, &pageof_reason);
|
||||
if (!p) {
|
||||
pool_hotbox_v2_record_pageof_fail(class_idx, pageof_reason);
|
||||
if (pool_hotbox_v2_stats_enabled()) {
|
||||
static _Atomic uint32_t dbg = 0;
|
||||
uint32_t n = atomic_fetch_add_explicit(&dbg, 1, memory_order_relaxed);
|
||||
if (n < 4) {
|
||||
pool_class_v2* hc = &ctx->cls[class_idx];
|
||||
fprintf(stderr,
|
||||
"[POOL_V2 page_of_fail] cls=%u ptr=%p reason=%d cur=%p cur_base=%p cur_cap=%u cur_bs=%u partial=%p\n",
|
||||
class_idx, raw_block, pageof_reason,
|
||||
(void*)hc->current,
|
||||
hc->current ? hc->current->base : NULL,
|
||||
hc->current ? hc->current->capacity : 0u,
|
||||
hc->current ? hc->current->block_size : 0u,
|
||||
(void*)hc->partial);
|
||||
}
|
||||
}
|
||||
return 0; // let caller fall back to v1
|
||||
}
|
||||
|
||||
*(void**)raw_block = p->freelist;
|
||||
p->freelist = raw_block;
|
||||
if (p->used > 0) p->used--;
|
||||
pool_hotbox_v2_record_free_fast(class_idx);
|
||||
|
||||
pool_class_v2* hc = &ctx->cls[class_idx];
|
||||
if (p->used == 0) {
|
||||
pool_hotbox_v2_unlink_partial(hc, p);
|
||||
if (hc->current == p) hc->current = NULL;
|
||||
if (hc->partial_count < hc->max_partial_pages) {
|
||||
pool_hotbox_v2_push_partial(hc, p);
|
||||
} else {
|
||||
pool_hotbox_v2_page_retire_slow(ctx, class_idx, p);
|
||||
}
|
||||
} else {
|
||||
if (!hc->current) hc->current = p;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
__attribute__((destructor)) static void pool_hotbox_v2_dump_stats(void) {
|
||||
if (!pool_hotbox_v2_stats_enabled()) return;
|
||||
for (int i = 0; i < POOL_NUM_CLASSES; i++) {
|
||||
uint64_t ac = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_calls, memory_order_relaxed);
|
||||
uint64_t ar = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_refill, memory_order_relaxed);
|
||||
uint64_t arf = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_refill_fail, memory_order_relaxed);
|
||||
uint64_t afb = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_fallback_v1, memory_order_relaxed);
|
||||
uint64_t fc = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].free_calls, memory_order_relaxed);
|
||||
uint64_t ffb = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].free_fallback_v1, memory_order_relaxed);
|
||||
uint64_t af = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_fast, memory_order_relaxed);
|
||||
uint64_t ff = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].free_fast, memory_order_relaxed);
|
||||
uint64_t pf_hdr = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].page_of_fail_header_missing, memory_order_relaxed);
|
||||
uint64_t pf_range = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].page_of_fail_out_of_range, memory_order_relaxed);
|
||||
uint64_t pf_mis = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].page_of_fail_misaligned, memory_order_relaxed);
|
||||
uint64_t pf_unknown = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].page_of_fail_unknown, memory_order_relaxed);
|
||||
if (ac || afb || fc || ffb || ar || arf || af || ff || pf_hdr || pf_range || pf_mis || pf_unknown) {
|
||||
fprintf(stderr, "[POOL_V2_STATS] cls=%d alloc_calls=%llu alloc_fast=%llu alloc_refill=%llu alloc_refill_fail=%llu alloc_fb_v1=%llu free_calls=%llu free_fast=%llu free_fb_v1=%llu pageof_hdr=%llu pageof_range=%llu pageof_misaligned=%llu pageof_unknown=%llu\n",
|
||||
i, (unsigned long long)ac, (unsigned long long)af, (unsigned long long)ar,
|
||||
(unsigned long long)arf, (unsigned long long)afb,
|
||||
(unsigned long long)fc, (unsigned long long)ff, (unsigned long long)ffb,
|
||||
(unsigned long long)pf_hdr, (unsigned long long)pf_range, (unsigned long long)pf_mis, (unsigned long long)pf_unknown);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Size class table (for O(1) lookup). Index 5/6 are Bridge classes for 32-64KB gap.
|
||||
// 7 classes including Bridge classes (40KB, 52KB) to fill 32-64KB gap
|
||||
static size_t g_class_sizes[POOL_NUM_CLASSES] = {
|
||||
@ -893,10 +1407,9 @@ int hak_pool_get_shard_index(uintptr_t site_id) {
|
||||
return (int)((uint32_t)x & (POOL_NUM_SHARDS - 1));
|
||||
}
|
||||
|
||||
// TLS helpers
|
||||
// TLS helpers (non-inline helpers for shard bookkeeping)
|
||||
#include "box/pool_tls_core.inc.h"
|
||||
|
||||
|
||||
// Refill/ACE (boxed)
|
||||
#include "box/pool_refill.inc.h"
|
||||
|
||||
|
||||
@ -5,9 +5,12 @@
|
||||
|
||||
#include "hakmem_sys.h"
|
||||
#include "hakmem_debug.h"
|
||||
#include "hakmem_env_cache.h" // For HAK_ENV_SS_MADVISE_STRICT
|
||||
#include "box/ss_os_acquire_box.h"
|
||||
#include <sys/mman.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h> // For errno values
|
||||
|
||||
// madvise constants (Linux)
|
||||
#ifndef MADV_DONTNEED
|
||||
@ -56,12 +59,16 @@ void hkm_sys_madvise_dontneed(void* ptr, size_t size) {
|
||||
|
||||
HKM_TIME_START(t0);
|
||||
|
||||
int ret = madvise(ptr, size, MADV_DONTNEED);
|
||||
int ret = ss_os_madvise_guarded(ptr, size, MADV_DONTNEED, "hakmem_sys_dontneed");
|
||||
|
||||
HKM_TIME_END(HKM_CAT_SYSCALL_MADVISE, t0);
|
||||
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "[HAKMEM SYS] madvise(DONTNEED, %p, %zu) failed\n", ptr, size);
|
||||
fprintf(stderr, "[HAKMEM SYS] madvise(DONTNEED, %p, %zu) failed errno=%d\n", ptr, size, errno);
|
||||
if (HAK_ENV_SS_MADVISE_STRICT() && errno == EINVAL) {
|
||||
fprintf(stderr, "[HAKMEM SYS] Critical: madvise(DONTNEED) failed with EINVAL in strict mode. Aborting.\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -70,11 +77,15 @@ void hkm_sys_madvise_willneed(void* ptr, size_t size) {
|
||||
|
||||
HKM_TIME_START(t0);
|
||||
|
||||
int ret = madvise(ptr, size, MADV_WILLNEED);
|
||||
int ret = ss_os_madvise_guarded(ptr, size, MADV_WILLNEED, "hakmem_sys_willneed");
|
||||
|
||||
HKM_TIME_END(HKM_CAT_SYSCALL_MADVISE, t0);
|
||||
|
||||
if (ret != 0) {
|
||||
fprintf(stderr, "[HAKMEM SYS] madvise(WILLNEED, %p, %zu) failed\n", ptr, size);
|
||||
fprintf(stderr, "[HAKMEM SYS] madvise(WILLNEED, %p, %zu) failed errno=%d\n", ptr, size, errno);
|
||||
if (HAK_ENV_SS_MADVISE_STRICT() && errno == EINVAL) {
|
||||
fprintf(stderr, "[HAKMEM SYS] Critical: madvise(WILLNEED) failed with EINVAL in strict mode. Aborting.\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
#include "box/tiny_next_ptr_box.h" // Box API: next pointer read/write
|
||||
#include "box/ptr_conversion_box.h" // Box API: pointer conversion
|
||||
#include "hakmem_env_cache.h" // Priority-2: ENV cache
|
||||
#include "box/tiny_cold_iface_v1.h" // Cold boundary wrapper for TinyHotHeap v2
|
||||
// Phase 1 modules (must come AFTER hakmem_tiny.h for TinyPool definition)
|
||||
#include "hakmem_tiny_batch_refill.h" // Phase 1: Batch refill/spill for mini-magazine
|
||||
#include "hakmem_tiny_stats.h" // Phase 1: Batched statistics (replaces XOR RNG)
|
||||
@ -24,6 +25,8 @@
|
||||
#include "tiny_route.h"
|
||||
#include "front/tiny_heap_v2.h"
|
||||
#include "box/tiny_front_stats_box.h"
|
||||
#include "box/tiny_front_v3_env_box.h"
|
||||
#include "box/ss_os_acquire_box.h"
|
||||
#include "tiny_tls_guard.h"
|
||||
#include "tiny_ready.h"
|
||||
#include "box/c7_meta_used_counter_box.h"
|
||||
@ -32,6 +35,8 @@
|
||||
#include "box/tiny_hotheap_v2_box.h"
|
||||
#include "box/tiny_route_env_box.h"
|
||||
#include "box/super_reg_box.h"
|
||||
#include "tiny_region_id.h"
|
||||
#include "tiny_debug_api.h"
|
||||
#include "hakmem_tiny_tls_list.h"
|
||||
#include "hakmem_tiny_remote_target.h" // Phase 2C-1: Remote target queue
|
||||
#include "hakmem_tiny_bg_spill.h" // Phase 2C-2: Background spill queue
|
||||
@ -59,6 +64,13 @@ tiny_route_kind_t g_tiny_route_class[TINY_NUM_CLASSES] = {0};
|
||||
int g_tiny_route_snapshot_done = 0;
|
||||
_Atomic uint64_t g_tiny_front_alloc_class[TINY_NUM_CLASSES] = {0};
|
||||
_Atomic uint64_t g_tiny_front_free_class[TINY_NUM_CLASSES] = {0};
|
||||
TinyFrontV3Snapshot g_tiny_front_v3_snapshot = {0};
|
||||
int g_tiny_front_v3_snapshot_ready = 0;
|
||||
static TinyFrontV3SizeClassEntry g_tiny_front_v3_lut[TINY_MAX_SIZE + 1] = {0};
|
||||
static int g_tiny_front_v3_lut_ready = 0;
|
||||
|
||||
// Forward decls (to keep deps light in this TU)
|
||||
int unified_cache_enabled(void);
|
||||
|
||||
static int tiny_heap_stats_dump_enabled(void) {
|
||||
static int g = -1;
|
||||
@ -70,6 +82,59 @@ static int tiny_heap_stats_dump_enabled(void) {
|
||||
return g;
|
||||
}
|
||||
|
||||
void tiny_front_v3_snapshot_init(void) {
|
||||
if (g_tiny_front_v3_snapshot_ready) {
|
||||
return;
|
||||
}
|
||||
TinyFrontV3Snapshot snap = {
|
||||
.unified_cache_on = unified_cache_enabled(),
|
||||
.tiny_guard_on = tiny_guard_is_enabled(),
|
||||
.header_mode = (uint8_t)tiny_header_mode(),
|
||||
.header_v3_enabled = tiny_header_v3_enabled(),
|
||||
.header_v3_skip_c7 = tiny_header_v3_skip_c7(),
|
||||
};
|
||||
g_tiny_front_v3_snapshot = snap;
|
||||
g_tiny_front_v3_snapshot_ready = 1;
|
||||
}
|
||||
|
||||
void tiny_front_v3_size_class_lut_init(void) {
|
||||
if (g_tiny_front_v3_lut_ready) {
|
||||
return;
|
||||
}
|
||||
tiny_route_snapshot_init();
|
||||
size_t max_size = tiny_get_max_size();
|
||||
if (max_size > TINY_MAX_SIZE) {
|
||||
max_size = TINY_MAX_SIZE;
|
||||
}
|
||||
for (size_t sz = 0; sz <= TINY_MAX_SIZE; sz++) {
|
||||
TinyFrontV3SizeClassEntry e = {
|
||||
.class_idx = TINY_FRONT_V3_INVALID_CLASS,
|
||||
.route_kind = (uint8_t)TINY_ROUTE_LEGACY,
|
||||
};
|
||||
if (sz == 0 || sz > max_size) {
|
||||
g_tiny_front_v3_lut[sz] = e;
|
||||
continue;
|
||||
}
|
||||
int cls = hak_tiny_size_to_class((int)sz);
|
||||
if (cls >= 0 && cls < TINY_NUM_CLASSES) {
|
||||
e.class_idx = (uint8_t)cls;
|
||||
e.route_kind = (uint8_t)tiny_route_for_class((uint8_t)cls);
|
||||
}
|
||||
g_tiny_front_v3_lut[sz] = e;
|
||||
}
|
||||
g_tiny_front_v3_lut_ready = 1;
|
||||
}
|
||||
|
||||
const TinyFrontV3SizeClassEntry* tiny_front_v3_lut_lookup(size_t size) {
|
||||
if (__builtin_expect(!g_tiny_front_v3_lut_ready, 0)) {
|
||||
tiny_front_v3_size_class_lut_init();
|
||||
}
|
||||
if (size == 0 || size > TINY_MAX_SIZE) {
|
||||
return NULL;
|
||||
}
|
||||
return &g_tiny_front_v3_lut[size];
|
||||
}
|
||||
|
||||
__attribute__((destructor))
|
||||
static void tiny_heap_stats_dump(void) {
|
||||
if (!tiny_heap_stats_enabled() || !tiny_heap_stats_dump_enabled()) {
|
||||
@ -159,16 +224,24 @@ static inline int tiny_hotheap_v2_stats_enabled(void) {
|
||||
return g;
|
||||
}
|
||||
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_calls = 0;
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_c7_route_hits = 0;
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_fast = 0;
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_lease = 0;
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_fallback_v1 = 0;
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_refill = 0;
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_route_fb = 0;
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_c7_free_calls = 0;
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_c7_free_fast = 0;
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_c7_free_fallback_v1 = 0;
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_route_hits[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_calls[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_fast[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_lease[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_fallback_v1[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_refill[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_refill_with_current[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_refill_with_partial[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_route_fb[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_free_calls[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_free_fast[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_free_fallback_v1[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_cold_refill_fail[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_cold_retire_calls[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_retire_calls_v2[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_partial_pushes[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_partial_pops[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static _Atomic uint64_t g_tiny_hotheap_v2_partial_peak[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
|
||||
typedef struct {
|
||||
_Atomic uint64_t prepare_calls;
|
||||
@ -178,34 +251,54 @@ typedef struct {
|
||||
_Atomic uint64_t page_retired;
|
||||
} TinyHotHeapV2PageStats;
|
||||
|
||||
static TinyHotHeapV2PageStats g_tiny_hotheap_v2_page_stats = {0};
|
||||
static TinyHotHeapV2PageStats g_tiny_hotheap_v2_page_stats[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||||
static void tiny_hotheap_v2_page_retire_slow(tiny_hotheap_ctx_v2* ctx,
|
||||
uint8_t class_idx,
|
||||
tiny_hotheap_page_v2* page);
|
||||
|
||||
void tiny_hotheap_v2_record_route_fallback(void) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_route_fb, 1, memory_order_relaxed);
|
||||
static inline uint8_t tiny_hotheap_v2_idx(uint8_t class_idx) {
|
||||
return (class_idx < TINY_HOTHEAP_MAX_CLASSES) ? class_idx : 0;
|
||||
}
|
||||
|
||||
void tiny_hotheap_v2_record_free_fallback(void) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_free_fallback_v1, 1, memory_order_relaxed);
|
||||
void tiny_hotheap_v2_record_route_fallback(uint8_t class_idx) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_route_fb[tiny_hotheap_v2_idx(class_idx)],
|
||||
1,
|
||||
memory_order_relaxed);
|
||||
}
|
||||
|
||||
void tiny_hotheap_v2_record_free_fallback(uint8_t class_idx) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fallback_v1[tiny_hotheap_v2_idx(class_idx)],
|
||||
1,
|
||||
memory_order_relaxed);
|
||||
}
|
||||
|
||||
void tiny_hotheap_v2_debug_snapshot(tiny_hotheap_v2_stats_snapshot_t* out) {
|
||||
if (!out) return;
|
||||
memset(out, 0, sizeof(*out));
|
||||
out->route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_c7_route_hits, memory_order_relaxed);
|
||||
out->alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_calls, memory_order_relaxed);
|
||||
out->alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_fast, memory_order_relaxed);
|
||||
out->alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_lease, memory_order_relaxed);
|
||||
out->alloc_refill = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_refill, memory_order_relaxed);
|
||||
out->alloc_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_fallback_v1, memory_order_relaxed);
|
||||
out->alloc_route_fb = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_route_fb, memory_order_relaxed);
|
||||
out->free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_c7_free_calls, memory_order_relaxed);
|
||||
out->free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_c7_free_fast, memory_order_relaxed);
|
||||
out->free_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_c7_free_fallback_v1, memory_order_relaxed);
|
||||
out->prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.prepare_calls, memory_order_relaxed);
|
||||
out->prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.prepare_with_current_null, memory_order_relaxed);
|
||||
out->prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.prepare_from_partial, memory_order_relaxed);
|
||||
out->free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.free_made_current, memory_order_relaxed);
|
||||
out->page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.page_retired, memory_order_relaxed);
|
||||
uint8_t ci = 7;
|
||||
out->route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_route_hits[ci], memory_order_relaxed);
|
||||
out->alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_calls[ci], memory_order_relaxed);
|
||||
out->alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fast[ci], memory_order_relaxed);
|
||||
out->alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_lease[ci], memory_order_relaxed);
|
||||
out->alloc_refill = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_refill[ci], memory_order_relaxed);
|
||||
out->refill_with_current = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_current[ci], memory_order_relaxed);
|
||||
out->refill_with_partial = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_partial[ci], memory_order_relaxed);
|
||||
out->alloc_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[ci], memory_order_relaxed);
|
||||
out->alloc_route_fb = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_route_fb[ci], memory_order_relaxed);
|
||||
out->free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_free_calls[ci], memory_order_relaxed);
|
||||
out->free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_free_fast[ci], memory_order_relaxed);
|
||||
out->free_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_free_fallback_v1[ci], memory_order_relaxed);
|
||||
out->cold_refill_fail = atomic_load_explicit(&g_tiny_hotheap_v2_cold_refill_fail[ci], memory_order_relaxed);
|
||||
out->cold_retire_calls = atomic_load_explicit(&g_tiny_hotheap_v2_cold_retire_calls[ci], memory_order_relaxed);
|
||||
out->retire_calls_v2 = atomic_load_explicit(&g_tiny_hotheap_v2_retire_calls_v2[ci], memory_order_relaxed);
|
||||
out->prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_calls, memory_order_relaxed);
|
||||
out->prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_with_current_null, memory_order_relaxed);
|
||||
out->prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_from_partial, memory_order_relaxed);
|
||||
out->free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].free_made_current, memory_order_relaxed);
|
||||
out->page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].page_retired, memory_order_relaxed);
|
||||
out->partial_pushes = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pushes[ci], memory_order_relaxed);
|
||||
out->partial_pops = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pops[ci], memory_order_relaxed);
|
||||
out->partial_peak = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[ci], memory_order_relaxed);
|
||||
}
|
||||
|
||||
static tiny_hotheap_page_v2* tiny_hotheap_v2_acquire_page_node(tiny_hotheap_class_v2* hc) {
|
||||
@ -246,6 +339,57 @@ static tiny_hotheap_page_v2* tiny_hotheap_v2_find_page(tiny_hotheap_class_v2* hc
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void tiny_hotheap_v2_partial_push(tiny_hotheap_class_v2* hc,
|
||||
tiny_hotheap_page_v2* page,
|
||||
uint8_t class_idx,
|
||||
int stats_on) {
|
||||
if (!hc || !page) return;
|
||||
page->next = hc->partial_pages;
|
||||
hc->partial_pages = page;
|
||||
if (hc->partial_count < UINT16_MAX) {
|
||||
hc->partial_count++;
|
||||
}
|
||||
if (stats_on) {
|
||||
uint8_t idx = tiny_hotheap_v2_idx(class_idx);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pushes[idx], 1, memory_order_relaxed);
|
||||
uint64_t cur = hc->partial_count;
|
||||
uint64_t old = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[idx], memory_order_relaxed);
|
||||
while (cur > old &&
|
||||
!atomic_compare_exchange_weak_explicit(&g_tiny_hotheap_v2_partial_peak[idx],
|
||||
&old,
|
||||
cur,
|
||||
memory_order_relaxed,
|
||||
memory_order_relaxed)) {
|
||||
old = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[idx], memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void tiny_hotheap_v2_maybe_trim_partial(tiny_hotheap_ctx_v2* ctx,
|
||||
tiny_hotheap_class_v2* hc,
|
||||
uint8_t class_idx,
|
||||
int stats_on) {
|
||||
if (!ctx || !hc) return;
|
||||
uint16_t limit = hc->max_partial_pages;
|
||||
if (limit == 0) {
|
||||
return;
|
||||
}
|
||||
while (hc->partial_count > limit && hc->partial_pages) {
|
||||
tiny_hotheap_page_v2* victim = hc->partial_pages;
|
||||
hc->partial_pages = victim->next;
|
||||
if (hc->partial_count > 0) {
|
||||
hc->partial_count--;
|
||||
}
|
||||
victim->next = NULL;
|
||||
if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pops[tiny_hotheap_v2_idx(class_idx)],
|
||||
1,
|
||||
memory_order_relaxed);
|
||||
}
|
||||
tiny_hotheap_v2_page_retire_slow(ctx, class_idx, victim);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void tiny_hotheap_v2_build_freelist(tiny_hotheap_page_v2* page,
|
||||
uint8_t class_idx,
|
||||
uint16_t stride) {
|
||||
@ -265,16 +409,6 @@ static inline void tiny_hotheap_v2_build_freelist(tiny_hotheap_page_v2* page,
|
||||
head = block;
|
||||
}
|
||||
page->freelist = head;
|
||||
if (page->lease_page) {
|
||||
page->lease_page->free_list = head;
|
||||
page->lease_page->used = page->used;
|
||||
if (page->lease_page->meta) {
|
||||
atomic_store_explicit(&page->lease_page->meta->freelist, head, memory_order_release);
|
||||
if (page->lease_page->meta->carved < page->capacity) {
|
||||
page->lease_page->meta->carved = page->capacity;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void tiny_hotheap_v2_unlink_page(tiny_hotheap_class_v2* hc, tiny_hotheap_page_v2* target) {
|
||||
@ -295,6 +429,9 @@ static void tiny_hotheap_v2_unlink_page(tiny_hotheap_class_v2* hc, tiny_hotheap_
|
||||
*head = cur->next;
|
||||
}
|
||||
cur->next = NULL;
|
||||
if (i == 0 && hc->partial_count > 0) {
|
||||
hc->partial_count--;
|
||||
}
|
||||
break;
|
||||
}
|
||||
prev = cur;
|
||||
@ -304,17 +441,35 @@ static void tiny_hotheap_v2_unlink_page(tiny_hotheap_class_v2* hc, tiny_hotheap_
|
||||
}
|
||||
|
||||
static tiny_hotheap_page_v2* tiny_hotheap_v2_refill_slow(tiny_hotheap_ctx_v2* ctx, uint8_t class_idx) {
|
||||
if (!ctx || class_idx != 7) {
|
||||
if (!ctx || class_idx >= TINY_HOTHEAP_MAX_CLASSES) {
|
||||
return NULL;
|
||||
}
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_refill, 1, memory_order_relaxed);
|
||||
TinyHeapClassStats* stats = tiny_heap_stats_for_class(7);
|
||||
int stats_on = tiny_hotheap_v2_stats_enabled();
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_refill[class_idx], 1, memory_order_relaxed);
|
||||
TinyHeapClassStats* stats = tiny_heap_stats_for_class(class_idx);
|
||||
if (__builtin_expect(stats != NULL, 0)) {
|
||||
atomic_fetch_add_explicit(&stats->alloc_slow_prepare, 1, memory_order_relaxed);
|
||||
}
|
||||
tiny_hotheap_class_v2* hc = &ctx->cls[class_idx];
|
||||
TinyHeapPageLease lease = tiny_heap_c7_lease_page_for_v2();
|
||||
if (!lease.page) {
|
||||
if (hc) {
|
||||
if (hc->current_page) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_refill_with_current[class_idx],
|
||||
1,
|
||||
memory_order_relaxed);
|
||||
}
|
||||
if (hc->partial_pages) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_refill_with_partial[class_idx],
|
||||
1,
|
||||
memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
// Cold iface (v1 TinyHeap) からページを 1 枚借りる
|
||||
TinyColdIface cold = tiny_cold_iface_v1();
|
||||
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
|
||||
tiny_heap_page_t* ipage = cold.refill_page ? cold.refill_page(cold_ctx, class_idx) : NULL;
|
||||
if (!ipage || !ipage->base || ipage->capacity == 0 || ipage->meta == NULL) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_cold_refill_fail[class_idx], 1, memory_order_relaxed);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -327,33 +482,25 @@ static tiny_hotheap_page_v2* tiny_hotheap_v2_refill_slow(tiny_hotheap_ctx_v2* ct
|
||||
return NULL;
|
||||
}
|
||||
|
||||
page->lease_page = lease.page;
|
||||
page->meta = lease.meta;
|
||||
page->ss = lease.ss;
|
||||
page->base = lease.base;
|
||||
page->capacity = lease.capacity;
|
||||
page->slab_idx = lease.slab_idx;
|
||||
page->freelist = lease.freelist;
|
||||
page->used = lease.page->used;
|
||||
if (page->lease_page) {
|
||||
page->lease_page->capacity = page->capacity;
|
||||
page->lease_page->free_list = page->freelist;
|
||||
page->lease_page->base = (uint8_t*)page->base;
|
||||
}
|
||||
page->lease_page = ipage;
|
||||
page->meta = ipage->meta;
|
||||
page->ss = ipage->ss;
|
||||
page->base = ipage->base;
|
||||
page->capacity = ipage->capacity;
|
||||
page->slab_idx = ipage->slab_idx;
|
||||
page->freelist = NULL;
|
||||
page->used = 0;
|
||||
|
||||
const uint16_t stride = hc->stride ? hc->stride : (uint16_t)tiny_stride_for_class(class_idx);
|
||||
if (page->freelist == NULL && page->base && page->capacity > page->used) {
|
||||
tiny_hotheap_v2_build_freelist(page, class_idx, stride);
|
||||
} else if (page->lease_page && page->lease_page->meta) {
|
||||
atomic_store_explicit(&page->lease_page->meta->freelist, page->freelist, memory_order_release);
|
||||
}
|
||||
tiny_hotheap_v2_build_freelist(page, class_idx, stride);
|
||||
|
||||
tiny_hotheap_page_v2* old_cur = hc->current_page;
|
||||
hc->current_page = page;
|
||||
page->next = NULL;
|
||||
if (old_cur && old_cur != page) {
|
||||
old_cur->next = hc->partial_pages;
|
||||
hc->partial_pages = old_cur;
|
||||
tiny_hotheap_v2_partial_push(hc, old_cur, class_idx, stats_on);
|
||||
}
|
||||
tiny_hotheap_v2_maybe_trim_partial(ctx, hc, class_idx, stats_on);
|
||||
if (!hc->current_page || !hc->current_page->freelist || hc->current_page->capacity == 0 ||
|
||||
hc->current_page->used > hc->current_page->capacity) {
|
||||
fprintf(stderr, "[HOTHEAP_V2_REFILL_ASSERT] current_page missing freelist (page=%p freelist=%p cap=%u used=%u)\n",
|
||||
@ -361,7 +508,7 @@ static tiny_hotheap_page_v2* tiny_hotheap_v2_refill_slow(tiny_hotheap_ctx_v2* ct
|
||||
hc->current_page ? hc->current_page->freelist : NULL,
|
||||
hc->current_page ? (unsigned)hc->current_page->capacity : 0u,
|
||||
hc->current_page ? (unsigned)hc->current_page->used : 0u);
|
||||
abort();
|
||||
return NULL;
|
||||
}
|
||||
return hc->current_page;
|
||||
}
|
||||
@ -370,17 +517,26 @@ static void tiny_hotheap_v2_page_retire_slow(tiny_hotheap_ctx_v2* ctx,
|
||||
uint8_t class_idx,
|
||||
tiny_hotheap_page_v2* page) {
|
||||
if (!ctx || !page) return;
|
||||
uint8_t idx = tiny_hotheap_v2_idx(class_idx);
|
||||
tiny_hotheap_class_v2* hc = &ctx->cls[class_idx];
|
||||
tiny_hotheap_v2_unlink_page(hc, page);
|
||||
TinyHeapPageLease lease = tiny_heap_page_lease_nil();
|
||||
lease.page = page->lease_page;
|
||||
lease.meta = page->meta;
|
||||
lease.ss = page->ss;
|
||||
lease.base = page->base;
|
||||
lease.capacity = page->capacity;
|
||||
lease.slab_idx = page->slab_idx;
|
||||
lease.freelist = page->freelist;
|
||||
tiny_heap_c7_return_page_from_v2(&lease);
|
||||
if (page->lease_page) {
|
||||
page->lease_page->used = page->used;
|
||||
page->lease_page->free_list = page->freelist;
|
||||
if (page->lease_page->meta) {
|
||||
atomic_store_explicit(&page->lease_page->meta->freelist, page->freelist, memory_order_release);
|
||||
atomic_store_explicit(&page->lease_page->meta->used, page->used, memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
TinyColdIface cold = tiny_cold_iface_v1();
|
||||
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
|
||||
if (cold.retire_page) {
|
||||
cold.retire_page(cold_ctx, class_idx, page->lease_page);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_cold_retire_calls[idx], 1, memory_order_relaxed);
|
||||
}
|
||||
if (tiny_hotheap_v2_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_retire_calls_v2[idx], 1, memory_order_relaxed);
|
||||
}
|
||||
if (page != &hc->storage_page) {
|
||||
free(page);
|
||||
} else {
|
||||
@ -394,38 +550,42 @@ static void tiny_hotheap_v2_page_retire_slow(tiny_hotheap_ctx_v2* ctx,
|
||||
}
|
||||
}
|
||||
if (tiny_hotheap_v2_stats_enabled()) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats.page_retired, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].page_retired, 1, memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void* tiny_hotheap_v2_try_pop(tiny_hotheap_page_v2* candidate,
|
||||
tiny_heap_class_t* v1hcls,
|
||||
static inline void* tiny_hotheap_v2_try_pop(tiny_hotheap_class_v2* hc,
|
||||
tiny_hotheap_page_v2* page,
|
||||
uint8_t class_idx,
|
||||
TinyHeapClassStats* stats,
|
||||
int stats_on) {
|
||||
if (!candidate || !candidate->lease_page || !v1hcls) {
|
||||
if (!hc || !page || !page->base || page->capacity == 0) {
|
||||
return NULL;
|
||||
}
|
||||
tiny_heap_page_t* ipage = candidate->lease_page;
|
||||
v1hcls->current_page = ipage; // keep v1 hot page pinned to avoid mark_full churn
|
||||
if (!(ipage->free_list || ipage->used < ipage->capacity)) {
|
||||
if (hc->stride == 0) {
|
||||
hc->stride = (uint16_t)tiny_stride_for_class(class_idx);
|
||||
}
|
||||
const uint16_t stride = hc->stride;
|
||||
void* block = NULL;
|
||||
if (page->freelist) {
|
||||
block = page->freelist;
|
||||
void* next = tiny_next_read(class_idx, block);
|
||||
page->freelist = next;
|
||||
} else if (page->used < page->capacity) {
|
||||
block = (void*)((uint8_t*)page->base + ((size_t)page->used * stride));
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
void* user = tiny_heap_page_pop(v1hcls, 7, ipage);
|
||||
if (!user) {
|
||||
return NULL;
|
||||
}
|
||||
if (ipage->used >= ipage->capacity && ipage->free_list == NULL) {
|
||||
tiny_heap_page_mark_full(v1hcls, ipage);
|
||||
}
|
||||
page->used++;
|
||||
if (__builtin_expect(stats != NULL, 0)) {
|
||||
atomic_fetch_add_explicit(&stats->alloc_fast_current, 1, memory_order_relaxed);
|
||||
}
|
||||
if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_fast, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fast[tiny_hotheap_v2_idx(class_idx)],
|
||||
1,
|
||||
memory_order_relaxed);
|
||||
}
|
||||
candidate->freelist = ipage->free_list;
|
||||
candidate->used = ipage->used;
|
||||
return tiny_region_id_write_header(user, 7);
|
||||
return tiny_region_id_write_header(block, class_idx);
|
||||
}
|
||||
|
||||
__attribute__((destructor))
|
||||
@ -433,35 +593,55 @@ static void tiny_hotheap_v2_stats_dump(void) {
|
||||
if (!tiny_hotheap_v2_stats_enabled()) {
|
||||
return;
|
||||
}
|
||||
uint64_t alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_calls, memory_order_relaxed);
|
||||
uint64_t route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_c7_route_hits, memory_order_relaxed);
|
||||
uint64_t alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_fast, memory_order_relaxed);
|
||||
uint64_t alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_lease, memory_order_relaxed);
|
||||
uint64_t alloc_fb = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_fallback_v1, memory_order_relaxed);
|
||||
uint64_t free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_c7_free_calls, memory_order_relaxed);
|
||||
uint64_t free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_c7_free_fast, memory_order_relaxed);
|
||||
uint64_t free_fb = atomic_load_explicit(&g_tiny_hotheap_v2_c7_free_fallback_v1, memory_order_relaxed);
|
||||
for (uint8_t ci = 0; ci < TINY_HOTHEAP_MAX_CLASSES; ci++) {
|
||||
uint64_t alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_calls[ci], memory_order_relaxed);
|
||||
uint64_t route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_route_hits[ci], memory_order_relaxed);
|
||||
uint64_t alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fast[ci], memory_order_relaxed);
|
||||
uint64_t alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_lease[ci], memory_order_relaxed);
|
||||
uint64_t alloc_fb = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[ci], memory_order_relaxed);
|
||||
uint64_t free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_free_calls[ci], memory_order_relaxed);
|
||||
uint64_t free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_free_fast[ci], memory_order_relaxed);
|
||||
uint64_t free_fb = atomic_load_explicit(&g_tiny_hotheap_v2_free_fallback_v1[ci], memory_order_relaxed);
|
||||
uint64_t cold_refill_fail = atomic_load_explicit(&g_tiny_hotheap_v2_cold_refill_fail[ci], memory_order_relaxed);
|
||||
uint64_t cold_retire_calls = atomic_load_explicit(&g_tiny_hotheap_v2_cold_retire_calls[ci], memory_order_relaxed);
|
||||
uint64_t retire_calls_v2 = atomic_load_explicit(&g_tiny_hotheap_v2_retire_calls_v2[ci], memory_order_relaxed);
|
||||
uint64_t partial_pushes = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pushes[ci], memory_order_relaxed);
|
||||
uint64_t partial_pops = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pops[ci], memory_order_relaxed);
|
||||
uint64_t partial_peak = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[ci], memory_order_relaxed);
|
||||
uint64_t refill_with_cur = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_current[ci], memory_order_relaxed);
|
||||
uint64_t refill_with_partial = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_partial[ci], memory_order_relaxed);
|
||||
|
||||
TinyHotHeapV2PageStats ps = {
|
||||
.prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.prepare_calls, memory_order_relaxed),
|
||||
.prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.prepare_with_current_null, memory_order_relaxed),
|
||||
.prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.prepare_from_partial, memory_order_relaxed),
|
||||
.free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.free_made_current, memory_order_relaxed),
|
||||
.page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.page_retired, memory_order_relaxed),
|
||||
};
|
||||
TinyHotHeapV2PageStats ps = {
|
||||
.prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_calls, memory_order_relaxed),
|
||||
.prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_with_current_null, memory_order_relaxed),
|
||||
.prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_from_partial, memory_order_relaxed),
|
||||
.free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].free_made_current, memory_order_relaxed),
|
||||
.page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].page_retired, memory_order_relaxed),
|
||||
};
|
||||
|
||||
if (alloc_calls || alloc_fast || alloc_lease || alloc_fb || free_calls || free_fast || free_fb ||
|
||||
ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial ||
|
||||
ps.free_made_current || ps.page_retired) {
|
||||
if (!(alloc_calls || alloc_fast || alloc_lease || alloc_fb || free_calls || free_fast || free_fb ||
|
||||
ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial ||
|
||||
ps.free_made_current || ps.page_retired || retire_calls_v2 || partial_pushes || partial_pops || partial_peak)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
tiny_route_kind_t route_kind = tiny_route_for_class(ci);
|
||||
fprintf(stderr,
|
||||
"[HOTHEAP_V2_C7_STATS] route_hits=%llu alloc_calls=%llu alloc_fast=%llu alloc_lease=%llu alloc_refill=%llu alloc_fb_v1=%llu alloc_route_fb=%llu free_calls=%llu free_fast=%llu free_fb_v1=%llu prep_calls=%llu prep_null=%llu prep_from_partial=%llu free_made_current=%llu page_retired=%llu\n",
|
||||
"[HOTHEAP_V2_STATS cls=%u route=%d] route_hits=%llu alloc_calls=%llu alloc_fast=%llu alloc_lease=%llu alloc_refill=%llu refill_cur=%llu refill_partial=%llu alloc_fb_v1=%llu alloc_route_fb=%llu cold_refill_fail=%llu cold_retire_calls=%llu retire_v2=%llu free_calls=%llu free_fast=%llu free_fb_v1=%llu prep_calls=%llu prep_null=%llu prep_from_partial=%llu free_made_current=%llu page_retired=%llu partial_push=%llu partial_pop=%llu partial_peak=%llu\n",
|
||||
(unsigned)ci,
|
||||
(int)route_kind,
|
||||
(unsigned long long)route_hits,
|
||||
(unsigned long long)alloc_calls,
|
||||
(unsigned long long)alloc_fast,
|
||||
(unsigned long long)alloc_lease,
|
||||
(unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_refill, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_alloc_refill[ci], memory_order_relaxed),
|
||||
(unsigned long long)refill_with_cur,
|
||||
(unsigned long long)refill_with_partial,
|
||||
(unsigned long long)alloc_fb,
|
||||
(unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_route_fb, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_alloc_route_fb[ci], memory_order_relaxed),
|
||||
(unsigned long long)cold_refill_fail,
|
||||
(unsigned long long)cold_retire_calls,
|
||||
(unsigned long long)retire_calls_v2,
|
||||
(unsigned long long)free_calls,
|
||||
(unsigned long long)free_fast,
|
||||
(unsigned long long)free_fb,
|
||||
@ -469,7 +649,10 @@ static void tiny_hotheap_v2_stats_dump(void) {
|
||||
(unsigned long long)ps.prepare_with_current_null,
|
||||
(unsigned long long)ps.prepare_from_partial,
|
||||
(unsigned long long)ps.free_made_current,
|
||||
(unsigned long long)ps.page_retired);
|
||||
(unsigned long long)ps.page_retired,
|
||||
(unsigned long long)partial_pushes,
|
||||
(unsigned long long)partial_pops,
|
||||
(unsigned long long)partial_peak);
|
||||
}
|
||||
}
|
||||
tiny_hotheap_ctx_v2* tiny_hotheap_v2_tls_get(void) {
|
||||
@ -484,6 +667,8 @@ tiny_hotheap_ctx_v2* tiny_hotheap_v2_tls_get(void) {
|
||||
for (int i = 0; i < TINY_HOTHEAP_MAX_CLASSES; i++) {
|
||||
tiny_hotheap_v2_page_reset(&ctx->cls[i].storage_page);
|
||||
ctx->cls[i].stride = (uint16_t)tiny_stride_for_class(i);
|
||||
ctx->cls[i].max_partial_pages = (i == 7 || i == 6) ? 2 : 0; // C6/C7 は 1〜2 枚握る
|
||||
ctx->cls[i].partial_count = 0;
|
||||
}
|
||||
}
|
||||
return ctx;
|
||||
@ -491,143 +676,174 @@ tiny_hotheap_ctx_v2* tiny_hotheap_v2_tls_get(void) {
|
||||
|
||||
void* tiny_hotheap_v2_alloc(uint8_t class_idx) {
|
||||
int stats_on = tiny_hotheap_v2_stats_enabled();
|
||||
uint8_t idx = tiny_hotheap_v2_idx(class_idx);
|
||||
if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_route_hits, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_calls, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_route_hits[idx], 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_calls[idx], 1, memory_order_relaxed);
|
||||
}
|
||||
if (__builtin_expect(class_idx != 7, 0)) {
|
||||
return NULL; // いまは C7 専用
|
||||
if (__builtin_expect(!(class_idx == 6 || class_idx == 7), 0)) {
|
||||
return NULL; // いまは C6/C7 のみ
|
||||
}
|
||||
|
||||
tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get();
|
||||
tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[7] : NULL;
|
||||
tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[class_idx] : NULL;
|
||||
tiny_hotheap_page_v2* v2page = vhcls ? vhcls->current_page : NULL;
|
||||
tiny_heap_ctx_t* v1ctx = tiny_heap_ctx_for_thread();
|
||||
tiny_heap_class_t* v1hcls = tiny_heap_class(v1ctx, 7);
|
||||
TinyHeapClassStats* stats = tiny_heap_stats_for_class(7);
|
||||
TinyHeapClassStats* stats = tiny_heap_stats_for_class(class_idx);
|
||||
|
||||
// current_page が壊れていそうなら一度捨てて slow に降りる
|
||||
if (v2page && (!v2page->base || v2page->capacity == 0)) {
|
||||
vhcls->current_page = NULL;
|
||||
v2page = NULL;
|
||||
}
|
||||
|
||||
// Hot path: current_page → partial → refill
|
||||
void* user = tiny_hotheap_v2_try_pop(v2page, v1hcls, stats, stats_on);
|
||||
void* user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on);
|
||||
if (user) {
|
||||
return user;
|
||||
}
|
||||
|
||||
// move exhausted current_page to full list if needed
|
||||
if (vhcls && v2page && v2page->used >= v2page->capacity && vhcls->current_page == v2page) {
|
||||
vhcls->current_page = NULL;
|
||||
v2page->next = vhcls->full_pages;
|
||||
vhcls->full_pages = v2page;
|
||||
}
|
||||
|
||||
while (vhcls && vhcls->partial_pages) {
|
||||
if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats.prepare_calls, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats.prepare_from_partial, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_calls, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_from_partial, 1, memory_order_relaxed);
|
||||
if (vhcls->current_page == NULL) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats.prepare_with_current_null, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_with_current_null, 1, memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
v2page = vhcls->partial_pages;
|
||||
vhcls->partial_pages = vhcls->partial_pages->next;
|
||||
if (vhcls->partial_count > 0) {
|
||||
vhcls->partial_count--;
|
||||
}
|
||||
if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pops[idx], 1, memory_order_relaxed);
|
||||
}
|
||||
v2page->next = NULL;
|
||||
vhcls->current_page = v2page;
|
||||
user = tiny_hotheap_v2_try_pop(v2page, v1hcls, stats, stats_on);
|
||||
user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on);
|
||||
if (user) {
|
||||
return user;
|
||||
}
|
||||
if (v2page->used >= v2page->capacity) {
|
||||
v2page->next = vhcls->full_pages;
|
||||
vhcls->full_pages = v2page;
|
||||
vhcls->current_page = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Lease a page from v1 (C7 SAFE) and wrap it
|
||||
tiny_hotheap_page_v2* leased = tiny_hotheap_v2_refill_slow(v2ctx, 7);
|
||||
if (!leased || !v1hcls) {
|
||||
tiny_hotheap_page_v2* leased = tiny_hotheap_v2_refill_slow(v2ctx, class_idx);
|
||||
if (!leased) {
|
||||
if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_fallback_v1, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_route_fb, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[idx], 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_route_fb[idx], 1, memory_order_relaxed);
|
||||
}
|
||||
size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(7)) : tiny_stride_for_class(7);
|
||||
return tiny_c7_alloc_fast(size); // safety fallback to v1
|
||||
size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(class_idx)) : tiny_stride_for_class(class_idx);
|
||||
if (class_idx == 7) {
|
||||
return tiny_c7_alloc_fast(size); // safety fallback to v1
|
||||
}
|
||||
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
|
||||
return tiny_heap_alloc_class_fast(cold_ctx, class_idx, size);
|
||||
}
|
||||
vhcls->current_page = leased;
|
||||
v2page = leased;
|
||||
if (v1hcls && v2page && v2page->lease_page) {
|
||||
v1hcls->current_page = v2page->lease_page;
|
||||
}
|
||||
if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_lease, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_lease[idx], 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
user = tiny_hotheap_v2_try_pop(v2page, v1hcls, stats, stats_on);
|
||||
user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on);
|
||||
if (user) {
|
||||
return user;
|
||||
}
|
||||
|
||||
if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_fallback_v1, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[idx], 1, memory_order_relaxed);
|
||||
}
|
||||
size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(7)) : tiny_stride_for_class(7);
|
||||
return tiny_c7_alloc_fast(size);
|
||||
size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(class_idx)) : tiny_stride_for_class(class_idx);
|
||||
if (class_idx == 7) {
|
||||
return tiny_c7_alloc_fast(size);
|
||||
}
|
||||
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
|
||||
return tiny_heap_alloc_class_fast(cold_ctx, class_idx, size);
|
||||
}
|
||||
|
||||
void tiny_hotheap_v2_free(uint8_t class_idx, void* p, void* meta) {
|
||||
if (__builtin_expect(class_idx != 7, 0)) {
|
||||
if (__builtin_expect(!(class_idx == 6 || class_idx == 7), 0)) {
|
||||
return;
|
||||
}
|
||||
uint8_t idx = tiny_hotheap_v2_idx(class_idx);
|
||||
int stats_on = tiny_hotheap_v2_stats_enabled();
|
||||
if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_free_calls, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_calls[idx], 1, memory_order_relaxed);
|
||||
}
|
||||
tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get();
|
||||
tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[7] : NULL;
|
||||
tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[class_idx] : NULL;
|
||||
TinySlabMeta* meta_ptr = (TinySlabMeta*)meta;
|
||||
|
||||
tiny_heap_ctx_t* v1ctx = tiny_heap_ctx_for_thread();
|
||||
tiny_heap_class_t* v1hcls = tiny_heap_class(v1ctx, 7);
|
||||
|
||||
tiny_hotheap_page_v2* page = tiny_hotheap_v2_find_page(vhcls, 7, p, meta_ptr);
|
||||
if (page && page->lease_page && v1hcls && tiny_heap_ptr_in_page_range(page->lease_page, p)) {
|
||||
tiny_heap_page_free_local(v1ctx, 7, page->lease_page, p);
|
||||
page->freelist = page->lease_page->free_list;
|
||||
page->used = page->lease_page->used;
|
||||
if (v1hcls) {
|
||||
v1hcls->current_page = page->lease_page;
|
||||
tiny_hotheap_page_v2* page = tiny_hotheap_v2_find_page(vhcls, class_idx, p, meta_ptr);
|
||||
if (page && page->base && page->capacity > 0) {
|
||||
tiny_next_write(class_idx, p, page->freelist);
|
||||
page->freelist = p;
|
||||
if (page->used > 0) {
|
||||
page->used--;
|
||||
}
|
||||
if (vhcls && vhcls->current_page != page) {
|
||||
tiny_hotheap_v2_unlink_page(vhcls, page);
|
||||
page->next = vhcls->current_page;
|
||||
vhcls->current_page = page;
|
||||
if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats.free_made_current, 1, memory_order_relaxed);
|
||||
}
|
||||
} else if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats.free_made_current, 1, memory_order_relaxed);
|
||||
}
|
||||
// C7-only: keep the page hot even when empty to avoid churn
|
||||
if (vhcls) {
|
||||
if (!vhcls->current_page) {
|
||||
vhcls->current_page = page;
|
||||
} else if (vhcls->current_page != page) {
|
||||
tiny_hotheap_v2_unlink_page(vhcls, page);
|
||||
page->next = vhcls->current_page;
|
||||
vhcls->current_page = page;
|
||||
}
|
||||
}
|
||||
if (page->used == 0 && vhcls && vhcls->partial_pages != page && vhcls->current_page == page) {
|
||||
// park empty page in partial to allow re-use without immediate Superslab return
|
||||
page->next = vhcls->partial_pages;
|
||||
vhcls->partial_pages = page;
|
||||
vhcls->current_page = page; // still treat as current
|
||||
}
|
||||
if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_free_fast, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].free_made_current, 1, memory_order_relaxed);
|
||||
}
|
||||
if (page->used == 0) {
|
||||
// 空ページは一度 partial に温存し、上限を超えたら retire
|
||||
tiny_hotheap_v2_unlink_page(vhcls, page);
|
||||
page->next = NULL;
|
||||
if (vhcls && vhcls->current_page == NULL) {
|
||||
vhcls->current_page = page;
|
||||
} else if (vhcls) {
|
||||
tiny_hotheap_v2_partial_push(vhcls, page, class_idx, stats_on);
|
||||
tiny_hotheap_v2_maybe_trim_partial(v2ctx, vhcls, class_idx, stats_on);
|
||||
}
|
||||
} else if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fast[idx], 1, memory_order_relaxed);
|
||||
}
|
||||
if (stats_on && page->used == 0) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fast[idx], 1, memory_order_relaxed);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Fallback: mimic v1 free path
|
||||
if (stats_on) {
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_free_fallback_v1, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fallback_v1[idx], 1, memory_order_relaxed);
|
||||
}
|
||||
SuperSlab* ss = hak_super_lookup(p);
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
int slab_idx = slab_index_for(ss, p);
|
||||
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
|
||||
tiny_c7_free_fast_with_meta(ss, slab_idx, p);
|
||||
if (class_idx == 7) {
|
||||
tiny_c7_free_fast_with_meta(ss, slab_idx, p);
|
||||
} else {
|
||||
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
|
||||
tiny_heap_free_class_fast_with_meta(cold_ctx, class_idx, ss, slab_idx, p);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
tiny_c7_free_fast(p);
|
||||
if (class_idx == 7) {
|
||||
tiny_c7_free_fast(p);
|
||||
} else {
|
||||
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
|
||||
tiny_heap_free_class_fast(cold_ctx, class_idx, p);
|
||||
}
|
||||
}
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
|
||||
@ -9,6 +9,11 @@
|
||||
//
|
||||
// Cold path only - called once at startup.
|
||||
|
||||
// Some build configurations expect this hook but do not provide an implementation.
|
||||
// Provide a no-op stub so that non-debug builds continue to link without optional
|
||||
// signal-dump support.
|
||||
static inline void hak_tiny_enable_signal_dump(void) { }
|
||||
|
||||
void hak_tiny_init(void) {
|
||||
if (g_tiny_initialized) return;
|
||||
|
||||
|
||||
@ -9,6 +9,9 @@ typedef struct {
|
||||
uint16_t thread_id; // low bits of thread id (best-effort)
|
||||
} AllocEvent;
|
||||
|
||||
// Forward decl (defined in ss_os_acquire_box.h)
|
||||
extern int ss_os_madvise_guarded(void* ptr, size_t len, int advice, const char* where);
|
||||
|
||||
#define EVENTQ_CAP 65536u
|
||||
#define EVENTQ_MASK (EVENTQ_CAP - 1u)
|
||||
static _Atomic uint32_t g_ev_tail = 0;
|
||||
@ -689,7 +692,7 @@ static inline void superslab_partial_release(SuperSlab* ss, uint32_t epoch) {
|
||||
uint32_t prev = ss->partial_epoch;
|
||||
if (epoch != 0 && (epoch - prev) < g_ss_partial_interval) return;
|
||||
size_t len = (size_t)1 << ss->lg_size;
|
||||
if (madvise(ss, len, MADV_DONTNEED) == 0) {
|
||||
if (ss_os_madvise_guarded(ss, len, MADV_DONTNEED, "tiny_ss_partial") == 0) {
|
||||
ss->partial_epoch = epoch;
|
||||
}
|
||||
#else
|
||||
|
||||
325
core/smallobject_hotbox_v3.c
Normal file
325
core/smallobject_hotbox_v3.c
Normal file
@ -0,0 +1,325 @@
|
||||
// smallobject_hotbox_v3.c - SmallObject HotHeap v3 skeleton (C7-first)
|
||||
// Phase A/B: 型と stats だけ。alloc/free は v1 にフォールバックさせる。
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "box/smallobject_hotbox_v3_box.h"
|
||||
#include "box/smallobject_cold_iface_v1.h"
|
||||
#include "box/tiny_heap_box.h"
|
||||
#include "box/tiny_front_v3_env_box.h"
|
||||
#include "hakmem_tiny.h" // TINY_SLAB_SIZE mask for page_of
|
||||
#include "tiny_region_id.h"
|
||||
|
||||
static __thread so_ctx_v3* g_so_ctx_v3;
|
||||
static int g_so_stats_enabled = -1;
|
||||
static so_stats_class_v3 g_so_stats[SMALLOBJECT_NUM_CLASSES];
|
||||
|
||||
int so_v3_stats_enabled(void) {
|
||||
if (__builtin_expect(g_so_stats_enabled == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_SMALL_HEAP_V3_STATS");
|
||||
g_so_stats_enabled = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g_so_stats_enabled;
|
||||
}
|
||||
|
||||
static inline so_stats_class_v3* so_stats_for(uint8_t ci) {
|
||||
if (!so_v3_stats_enabled()) return NULL;
|
||||
if (ci >= SMALLOBJECT_NUM_CLASSES) return NULL;
|
||||
return &g_so_stats[ci];
|
||||
}
|
||||
|
||||
void so_v3_record_route_hit(uint8_t ci) {
|
||||
so_stats_class_v3* st = so_stats_for(ci);
|
||||
if (st) atomic_fetch_add_explicit(&st->route_hits, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void so_v3_record_alloc_call(uint8_t ci) {
|
||||
so_stats_class_v3* st = so_stats_for(ci);
|
||||
if (st) atomic_fetch_add_explicit(&st->alloc_calls, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void so_v3_record_alloc_refill(uint8_t ci) {
|
||||
so_stats_class_v3* st = so_stats_for(ci);
|
||||
if (st) atomic_fetch_add_explicit(&st->alloc_refill, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void so_v3_record_alloc_fallback(uint8_t ci) {
|
||||
so_stats_class_v3* st = so_stats_for(ci);
|
||||
if (st) atomic_fetch_add_explicit(&st->alloc_fallback_v1, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void so_v3_record_free_call(uint8_t ci) {
|
||||
so_stats_class_v3* st = so_stats_for(ci);
|
||||
if (st) atomic_fetch_add_explicit(&st->free_calls, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void so_v3_record_free_fallback(uint8_t ci) {
|
||||
so_stats_class_v3* st = so_stats_for(ci);
|
||||
if (st) atomic_fetch_add_explicit(&st->free_fallback_v1, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
so_ctx_v3* so_tls_get(void) {
|
||||
so_ctx_v3* ctx = g_so_ctx_v3;
|
||||
if (__builtin_expect(ctx == NULL, 0)) {
|
||||
ctx = (so_ctx_v3*)calloc(1, sizeof(so_ctx_v3));
|
||||
if (!ctx) {
|
||||
fprintf(stderr, "[SMALL_HEAP_V3] TLS alloc failed\n");
|
||||
abort();
|
||||
}
|
||||
for (int i = 0; i < SMALLOBJECT_NUM_CLASSES; i++) {
|
||||
so_class_v3* hc = &ctx->cls[i];
|
||||
hc->block_size = (uint32_t)tiny_stride_for_class(i);
|
||||
hc->max_partial_pages = 2;
|
||||
}
|
||||
g_so_ctx_v3 = ctx;
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void* so_build_freelist(so_page_v3* page) {
|
||||
if (!page || !page->base || page->block_size == 0 || page->capacity == 0) return NULL;
|
||||
uint8_t* base = (uint8_t*)page->base;
|
||||
void* head = NULL;
|
||||
for (uint32_t i = 0; i < page->capacity; i++) {
|
||||
uint8_t* blk = base + ((size_t)i * page->block_size);
|
||||
*(void**)blk = head;
|
||||
head = blk;
|
||||
}
|
||||
return head;
|
||||
}
|
||||
|
||||
static inline int so_ptr_in_page(so_page_v3* page, void* ptr) {
|
||||
if (!page || !ptr) return 0;
|
||||
uintptr_t base = (uintptr_t)page->base;
|
||||
uintptr_t p = (uintptr_t)ptr;
|
||||
uintptr_t span = (uintptr_t)page->block_size * (uintptr_t)page->capacity;
|
||||
if (p < base || p >= base + span) return 0;
|
||||
if (((p - base) % page->block_size) != 0) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline so_page_v3* so_page_of(so_class_v3* hc, void* ptr) {
|
||||
if (!ptr || !hc) return NULL;
|
||||
so_page_v3* page = hc->current;
|
||||
if (page && so_ptr_in_page(page, ptr)) {
|
||||
return page;
|
||||
}
|
||||
page = hc->partial;
|
||||
while (page) {
|
||||
if (so_ptr_in_page(page, ptr)) {
|
||||
return page;
|
||||
}
|
||||
page = page->next;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void so_page_push_partial(so_class_v3* hc, so_page_v3* page) {
|
||||
if (!hc || !page) return;
|
||||
page->next = hc->partial;
|
||||
hc->partial = page;
|
||||
hc->partial_count++;
|
||||
}
|
||||
|
||||
static inline void so_page_retire_slow(so_ctx_v3* ctx, uint32_t ci, so_page_v3* page);
|
||||
|
||||
static inline void* so_alloc_fast(so_ctx_v3* ctx, uint32_t ci) {
|
||||
so_class_v3* hc = &ctx->cls[ci];
|
||||
const bool skip_header_c7 = (ci == 7) && tiny_header_v3_enabled() && tiny_header_v3_skip_c7();
|
||||
so_page_v3* p = hc->current;
|
||||
if (p && p->freelist && p->used < p->capacity) {
|
||||
void* blk = p->freelist;
|
||||
p->freelist = *(void**)blk;
|
||||
p->used++;
|
||||
if (skip_header_c7) {
|
||||
uint8_t* header_ptr = (uint8_t*)blk;
|
||||
*header_ptr = (uint8_t)(HEADER_MAGIC | (ci & HEADER_CLASS_MASK));
|
||||
return header_ptr + 1; // mirror tiny_region_id_write_header fast path
|
||||
}
|
||||
return tiny_region_id_write_header(blk, (int)ci);
|
||||
}
|
||||
|
||||
if (hc->partial) {
|
||||
so_page_v3* old_cur = hc->current;
|
||||
p = hc->partial;
|
||||
hc->partial = p->next;
|
||||
if (hc->partial_count > 0) {
|
||||
hc->partial_count--;
|
||||
}
|
||||
p->next = NULL;
|
||||
hc->current = p;
|
||||
if (old_cur && old_cur != p) {
|
||||
if (hc->partial_count < hc->max_partial_pages) {
|
||||
so_page_push_partial(hc, old_cur);
|
||||
} else {
|
||||
so_page_retire_slow(ctx, ci, old_cur);
|
||||
}
|
||||
}
|
||||
if (p->freelist && p->used < p->capacity) {
|
||||
void* blk = p->freelist;
|
||||
p->freelist = *(void**)blk;
|
||||
p->used++;
|
||||
if (skip_header_c7) {
|
||||
uint8_t* header_ptr = (uint8_t*)blk;
|
||||
*header_ptr = (uint8_t)(HEADER_MAGIC | (ci & HEADER_CLASS_MASK));
|
||||
return header_ptr + 1;
|
||||
}
|
||||
return tiny_region_id_write_header(blk, (int)ci);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int so_unlink_partial(so_class_v3* hc, so_page_v3* target) {
|
||||
if (!hc || !target) return 0;
|
||||
so_page_v3* prev = NULL;
|
||||
so_page_v3* cur = hc->partial;
|
||||
while (cur) {
|
||||
if (cur == target) {
|
||||
if (prev) {
|
||||
prev->next = cur->next;
|
||||
} else {
|
||||
hc->partial = cur->next;
|
||||
}
|
||||
if (hc->partial_count > 0) {
|
||||
hc->partial_count--;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
prev = cur;
|
||||
cur = cur->next;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void so_page_retire_slow(so_ctx_v3* ctx, uint32_t ci, so_page_v3* page) {
|
||||
SmallObjectColdIface cold = smallobject_cold_iface_v1();
|
||||
void* cold_ctx = (void*)tiny_heap_ctx_for_thread();
|
||||
if (cold.retire_page) {
|
||||
cold.retire_page(cold_ctx, ci, page);
|
||||
} else {
|
||||
free(page);
|
||||
}
|
||||
(void)ctx;
|
||||
}
|
||||
|
||||
static inline void so_free_fast(so_ctx_v3* ctx, uint32_t ci, void* ptr) {
|
||||
so_class_v3* hc = &ctx->cls[ci];
|
||||
so_page_v3* page = so_page_of(hc, ptr);
|
||||
if (!page) {
|
||||
so_v3_record_free_fallback((uint8_t)ci);
|
||||
tiny_heap_free_class_fast(tiny_heap_ctx_for_thread(), (int)ci, ptr);
|
||||
return;
|
||||
}
|
||||
|
||||
*(void**)ptr = page->freelist;
|
||||
page->freelist = ptr;
|
||||
if (page->used > 0) {
|
||||
page->used--;
|
||||
}
|
||||
|
||||
if (page->used == 0) {
|
||||
(void)so_unlink_partial(hc, page);
|
||||
if (hc->partial_count < hc->max_partial_pages) {
|
||||
so_page_push_partial(hc, page);
|
||||
if (!hc->current) {
|
||||
hc->current = page;
|
||||
}
|
||||
} else {
|
||||
if (hc->current == page) {
|
||||
hc->current = NULL;
|
||||
}
|
||||
so_page_retire_slow(ctx, ci, page);
|
||||
}
|
||||
} else if (!hc->current) {
|
||||
hc->current = page;
|
||||
}
|
||||
}
|
||||
|
||||
static inline so_page_v3* so_alloc_refill_slow(so_ctx_v3* ctx, uint32_t ci) {
|
||||
SmallObjectColdIface cold = smallobject_cold_iface_v1();
|
||||
void* cold_ctx = (void*)tiny_heap_ctx_for_thread();
|
||||
if (!cold.refill_page) return NULL;
|
||||
so_page_v3* page = cold.refill_page(cold_ctx, ci);
|
||||
if (!page) return NULL;
|
||||
|
||||
if (page->block_size == 0) {
|
||||
page->block_size = (uint32_t)tiny_stride_for_class((int)ci);
|
||||
}
|
||||
page->class_idx = ci;
|
||||
|
||||
page->used = 0;
|
||||
page->freelist = so_build_freelist(page);
|
||||
if (!page->freelist) {
|
||||
if (cold.retire_page) {
|
||||
cold.retire_page(cold_ctx, ci, page);
|
||||
} else {
|
||||
free(page);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
page->next = NULL;
|
||||
|
||||
so_class_v3* hc = &ctx->cls[ci];
|
||||
if (hc->current) {
|
||||
if (hc->partial_count < hc->max_partial_pages) {
|
||||
so_page_push_partial(hc, hc->current);
|
||||
} else {
|
||||
so_page_retire_slow(ctx, ci, hc->current);
|
||||
}
|
||||
}
|
||||
hc->current = page;
|
||||
return page;
|
||||
}
|
||||
|
||||
void* so_alloc(uint32_t class_idx) {
|
||||
if (__builtin_expect(class_idx >= SMALLOBJECT_NUM_CLASSES, 0)) {
|
||||
return NULL;
|
||||
}
|
||||
so_v3_record_route_hit((uint8_t)class_idx);
|
||||
so_v3_record_alloc_call((uint8_t)class_idx);
|
||||
|
||||
so_ctx_v3* ctx = so_tls_get();
|
||||
void* blk = so_alloc_fast(ctx, class_idx);
|
||||
if (blk) return blk;
|
||||
|
||||
so_page_v3* page = so_alloc_refill_slow(ctx, class_idx);
|
||||
if (!page) {
|
||||
so_v3_record_alloc_fallback((uint8_t)class_idx);
|
||||
return NULL;
|
||||
}
|
||||
so_v3_record_alloc_refill((uint8_t)class_idx);
|
||||
blk = so_alloc_fast(ctx, class_idx);
|
||||
if (!blk) {
|
||||
so_v3_record_alloc_fallback((uint8_t)class_idx);
|
||||
}
|
||||
return blk;
|
||||
}
|
||||
|
||||
void so_free(uint32_t class_idx, void* ptr) {
|
||||
if (__builtin_expect(class_idx >= SMALLOBJECT_NUM_CLASSES, 0)) {
|
||||
return;
|
||||
}
|
||||
so_v3_record_free_call((uint8_t)class_idx);
|
||||
so_ctx_v3* ctx = so_tls_get();
|
||||
so_free_fast(ctx, class_idx, ptr);
|
||||
}
|
||||
|
||||
__attribute__((destructor))
|
||||
static void so_v3_stats_dump(void) {
|
||||
if (!so_v3_stats_enabled()) return;
|
||||
for (int i = 0; i < SMALLOBJECT_NUM_CLASSES; i++) {
|
||||
so_stats_class_v3* st = &g_so_stats[i];
|
||||
uint64_t rh = atomic_load_explicit(&st->route_hits, memory_order_relaxed);
|
||||
uint64_t ac = atomic_load_explicit(&st->alloc_calls, memory_order_relaxed);
|
||||
uint64_t ar = atomic_load_explicit(&st->alloc_refill, memory_order_relaxed);
|
||||
uint64_t afb = atomic_load_explicit(&st->alloc_fallback_v1, memory_order_relaxed);
|
||||
uint64_t fc = atomic_load_explicit(&st->free_calls, memory_order_relaxed);
|
||||
uint64_t ffb = atomic_load_explicit(&st->free_fallback_v1, memory_order_relaxed);
|
||||
if (rh + ac + afb + fc + ffb + ar == 0) continue;
|
||||
fprintf(stderr, "[SMALL_HEAP_V3_STATS] cls=%d route_hits=%llu alloc_calls=%llu alloc_refill=%llu alloc_fb_v1=%llu free_calls=%llu free_fb_v1=%llu\n",
|
||||
i, (unsigned long long)rh, (unsigned long long)ac,
|
||||
(unsigned long long)ar, (unsigned long long)afb, (unsigned long long)fc, (unsigned long long)ffb);
|
||||
}
|
||||
}
|
||||
@ -4,6 +4,7 @@
|
||||
// Date: 2025-11-28
|
||||
|
||||
#include "hakmem_tiny_superslab_internal.h"
|
||||
#include "hakmem_env_cache.h"
|
||||
#include "box/ss_os_acquire_box.h"
|
||||
|
||||
// ============================================================================
|
||||
@ -116,9 +117,12 @@ void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int p
|
||||
// This is critical: we MUST touch the pages after munmap() to establish valid mappings
|
||||
// CRITICAL FIX (2025-12-05): Use MADV_POPULATE_WRITE for efficiency
|
||||
#ifdef MADV_POPULATE_WRITE
|
||||
int ret = madvise(ptr, ss_size, MADV_POPULATE_WRITE);
|
||||
ss_os_stats_record_madvise();
|
||||
int ret = ss_os_madvise_guarded(ptr, ss_size, MADV_POPULATE_WRITE, "ss_cache_populate");
|
||||
if (ret != 0) {
|
||||
if (HAK_ENV_SS_MADVISE_STRICT() && errno == EINVAL) {
|
||||
fprintf(stderr, "[SS_CACHE] madvise(MADV_POPULATE_WRITE) EINVAL (strict). Aborting.\n");
|
||||
abort();
|
||||
}
|
||||
// Fallback: explicit memset
|
||||
memset(ptr, 0, ss_size);
|
||||
}
|
||||
|
||||
@ -4,6 +4,8 @@
|
||||
// Date: 2025-11-28
|
||||
|
||||
#include "hakmem_tiny_superslab_internal.h"
|
||||
#include "box/ss_os_acquire_box.h"
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// ============================================================================
|
||||
@ -33,8 +35,11 @@ _Atomic uint64_t g_final_fallback_mmap_count = 0;
|
||||
_Atomic uint64_t g_ss_os_alloc_calls = 0;
|
||||
_Atomic uint64_t g_ss_os_free_calls = 0;
|
||||
_Atomic uint64_t g_ss_os_madvise_calls = 0;
|
||||
_Atomic uint64_t g_ss_os_madvise_fail_enomem = 0;
|
||||
_Atomic uint64_t g_ss_os_madvise_fail_other = 0;
|
||||
_Atomic uint64_t g_ss_os_huge_alloc_calls = 0;
|
||||
_Atomic uint64_t g_ss_os_huge_fail_calls = 0;
|
||||
_Atomic bool g_ss_madvise_disabled = false;
|
||||
|
||||
// Superslab/slab observability (Tiny-only; relaxed updates)
|
||||
_Atomic uint64_t g_ss_live_by_class[8] = {0};
|
||||
@ -224,10 +229,14 @@ static void ss_os_stats_dump(void) {
|
||||
return;
|
||||
}
|
||||
fprintf(stderr,
|
||||
"[SS_OS_STATS] alloc=%llu free=%llu madvise=%llu mmap_total=%llu fallback_mmap=%llu huge_alloc=%llu huge_fail=%llu\n",
|
||||
"[SS_OS_STATS] alloc=%llu free=%llu madvise=%llu madvise_enomem=%llu madvise_other=%llu madvise_disabled=%d "
|
||||
"mmap_total=%llu fallback_mmap=%llu huge_alloc=%llu huge_fail=%llu\n",
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_os_alloc_calls, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_os_free_calls, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_os_madvise_calls, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_os_madvise_fail_enomem, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_os_madvise_fail_other, memory_order_relaxed),
|
||||
atomic_load_explicit(&g_ss_madvise_disabled, memory_order_relaxed) ? 1 : 0,
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_mmap_count, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_final_fallback_mmap_count, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_ss_os_huge_alloc_calls, memory_order_relaxed),
|
||||
|
||||
Reference in New Issue
Block a user