Boxify superslab registry, add bench profile, and document C7 hotpath experiments

This commit is contained in:
Moe Charm (CI)
2025-12-07 03:12:27 +09:00
parent 18faa6a1c4
commit fda6cd2e67
71 changed files with 2052 additions and 286 deletions

View File

@ -0,0 +1,15 @@
// c7_hotpath_env_box.h - ENV gate for C7 hotpath
// Purpose: isolate the ENV handling so hotpath code can assume gate済み。
#pragma once
#include <stdlib.h>
// ENV gate: HAKMEM_TINY_C7_HOT=1 で有効化(デフォルト OFF
static inline int tiny_c7_hot_enabled(void) {
static int g_enable = -1;
if (__builtin_expect(g_enable == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_C7_HOT");
g_enable = (e && *e && *e != '0') ? 1 : 0;
}
return g_enable;
}

View File

@ -0,0 +1,8 @@
// c7_meta_used_counter_box.c
// Definitions for C7 meta->used increment counters (Release/Debug共通)
#include "c7_meta_used_counter_box.h"
_Atomic uint64_t g_c7_meta_used_inc_total = 0;
_Atomic uint64_t g_c7_meta_used_inc_backend = 0;
_Atomic uint64_t g_c7_meta_used_inc_tls = 0;
_Atomic uint64_t g_c7_meta_used_inc_front = 0;

View File

@ -17,8 +17,9 @@ core/box/carve_push_box.o: core/box/carve_push_box.c \
core/box/../tiny_region_id.h core/box/../tiny_box_geometry.h \
core/box/../ptr_track.h core/box/../hakmem_super_registry.h \
core/box/../box/ss_addr_map_box.h \
core/box/../box/../hakmem_build_flags.h core/box/../tiny_debug_api.h \
core/box/carve_push_box.h core/box/capacity_box.h core/box/tls_sll_box.h \
core/box/../box/../hakmem_build_flags.h core/box/../box/super_reg_box.h \
core/box/../tiny_debug_api.h core/box/carve_push_box.h \
core/box/capacity_box.h core/box/tls_sll_box.h \
core/box/../hakmem_internal.h core/box/../hakmem.h \
core/box/../hakmem_config.h core/box/../hakmem_features.h \
core/box/../hakmem_sys.h core/box/../hakmem_whale.h \
@ -70,6 +71,7 @@ core/box/../ptr_track.h:
core/box/../hakmem_super_registry.h:
core/box/../box/ss_addr_map_box.h:
core/box/../box/../hakmem_build_flags.h:
core/box/../box/super_reg_box.h:
core/box/../tiny_debug_api.h:
core/box/carve_push_box.h:
core/box/capacity_box.h:

View File

@ -11,20 +11,21 @@ core/box/front_gate_box.o: core/box/front_gate_box.c \
core/hakmem_tiny_superslab_constants.h core/superslab/superslab_inline.h \
core/superslab/superslab_types.h core/superslab/../tiny_box_geometry.h \
core/tiny_debug_ring.h core/tiny_remote.h core/box/ss_addr_map_box.h \
core/box/../hakmem_build_flags.h core/tiny_debug_api.h \
core/box/tiny_layout_box.h core/box/../hakmem_tiny_config.h \
core/box/tiny_header_box.h core/box/tiny_layout_box.h \
core/box/../tiny_region_id.h core/box/tls_sll_box.h \
core/box/../hakmem_internal.h core/box/../hakmem.h \
core/box/../hakmem_build_flags.h core/box/../hakmem_config.h \
core/box/../hakmem_features.h core/box/../hakmem_sys.h \
core/box/../hakmem_whale.h core/box/../box/ptr_type_box.h \
core/box/../hakmem_debug_master.h core/box/../tiny_remote.h \
core/box/../hakmem_tiny_integrity.h core/box/../hakmem_tiny.h \
core/box/../ptr_track.h core/box/../ptr_trace.h \
core/box/../hakmem_trace_master.h core/box/../hakmem_stats_master.h \
core/box/../tiny_debug_ring.h core/box/ss_addr_map_box.h \
core/box/../superslab/superslab_inline.h core/box/tiny_ptr_bridge_box.h \
core/box/../hakmem_build_flags.h core/box/super_reg_box.h \
core/tiny_debug_api.h core/box/tiny_layout_box.h \
core/box/../hakmem_tiny_config.h core/box/tiny_header_box.h \
core/box/tiny_layout_box.h core/box/../tiny_region_id.h \
core/box/tls_sll_box.h core/box/../hakmem_internal.h \
core/box/../hakmem.h core/box/../hakmem_build_flags.h \
core/box/../hakmem_config.h core/box/../hakmem_features.h \
core/box/../hakmem_sys.h core/box/../hakmem_whale.h \
core/box/../box/ptr_type_box.h core/box/../hakmem_debug_master.h \
core/box/../tiny_remote.h core/box/../hakmem_tiny_integrity.h \
core/box/../hakmem_tiny.h core/box/../ptr_track.h \
core/box/../ptr_trace.h core/box/../hakmem_trace_master.h \
core/box/../hakmem_stats_master.h core/box/../tiny_debug_ring.h \
core/box/ss_addr_map_box.h core/box/../superslab/superslab_inline.h \
core/box/tiny_ptr_bridge_box.h \
core/box/../hakmem_tiny_superslab_internal.h \
core/box/../hakmem_tiny_superslab.h core/box/../box/ss_hot_cold_box.h \
core/box/../box/../superslab/superslab_types.h \
@ -63,6 +64,7 @@ core/tiny_debug_ring.h:
core/tiny_remote.h:
core/box/ss_addr_map_box.h:
core/box/../hakmem_build_flags.h:
core/box/super_reg_box.h:
core/tiny_debug_api.h:
core/box/tiny_layout_box.h:
core/box/../hakmem_tiny_config.h:

View File

@ -11,8 +11,9 @@ core/box/front_gate_classifier.o: core/box/front_gate_classifier.c \
core/box/../superslab/../tiny_box_geometry.h \
core/box/../tiny_debug_ring.h core/box/../tiny_remote.h \
core/box/../box/ss_addr_map_box.h \
core/box/../box/../hakmem_build_flags.h core/box/../hakmem_tiny.h \
core/box/../hakmem_trace.h core/box/../hakmem_tiny_mini_mag.h \
core/box/../box/../hakmem_build_flags.h core/box/../box/super_reg_box.h \
core/box/../hakmem_tiny.h core/box/../hakmem_trace.h \
core/box/../hakmem_tiny_mini_mag.h \
core/box/../box/hak_lane_classify.inc.h core/box/../box/ptr_type_box.h \
core/box/../tiny_debug_api.h core/box/../hakmem_tiny_superslab.h \
core/box/../superslab/superslab_inline.h \
@ -38,6 +39,7 @@ core/box/../tiny_debug_ring.h:
core/box/../tiny_remote.h:
core/box/../box/ss_addr_map_box.h:
core/box/../box/../hakmem_build_flags.h:
core/box/../box/super_reg_box.h:
core/box/../hakmem_tiny.h:
core/box/../hakmem_trace.h:
core/box/../hakmem_tiny_mini_mag.h:

View File

@ -0,0 +1,88 @@
#include "remote_side_box.h"
#include <stdatomic.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef REM_SIDE_LOG2
#define REM_SIDE_LOG2 20
#endif
static _Atomic uint32_t g_remote_log2 = REM_SIDE_LOG2;
static _Atomic uint32_t g_remote_size = (1u << REM_SIDE_LOG2);
static _Atomic uint32_t g_remote_mask = (1u << REM_SIDE_LOG2) - 1;
static _Atomic int g_remote_profile_inited = 0;
static rem_side_entry* g_remote_slots = NULL;
static _Atomic int g_remote_allocated = 0;
static void remote_side_apply_profile(const char* profile) {
if (g_remote_profile_inited) {
return;
}
const char* env_profile = profile ? profile : getenv("HAKMEM_PROFILE");
int is_bench = (env_profile && strcmp(env_profile, "bench") == 0);
uint32_t log2 = REM_SIDE_LOG2;
if (is_bench && REM_SIDE_LOG2 > 4) {
// bench 用: ハッシュ幅だけ 1/8〜1/16 程度に論理縮小
log2 = REM_SIDE_LOG2 - 3; // 1/8
if (log2 < 12) {
log2 = 12; // 4096 entries までは確保
}
}
uint32_t size = (1u << log2);
uint32_t mask = size - 1;
atomic_store_explicit(&g_remote_log2, log2, memory_order_relaxed);
atomic_store_explicit(&g_remote_size, size, memory_order_relaxed);
atomic_store_explicit(&g_remote_mask, mask, memory_order_relaxed);
atomic_store_explicit(&g_remote_profile_inited, 1, memory_order_release);
}
void remote_side_init(RemoteSideBox* box, const char* profile) {
(void)box;
remote_side_apply_profile(profile);
if (atomic_load_explicit(&g_remote_allocated, memory_order_acquire)) {
return;
}
uint32_t size = remote_side_effective_size();
g_remote_slots = (rem_side_entry*)calloc(size, sizeof(rem_side_entry));
if (!g_remote_slots) {
fprintf(stderr, "[REMOTE_SIDE] failed to allocate %zu bytes\n",
(size_t)size * sizeof(rem_side_entry));
abort();
}
atomic_store_explicit(&g_remote_allocated, 1, memory_order_release);
}
uint32_t remote_side_effective_log2(void) {
if (!atomic_load_explicit(&g_remote_profile_inited, memory_order_acquire)) {
remote_side_apply_profile(NULL);
}
return atomic_load_explicit(&g_remote_log2, memory_order_relaxed);
}
uint32_t remote_side_effective_size(void) {
if (!atomic_load_explicit(&g_remote_profile_inited, memory_order_acquire)) {
remote_side_apply_profile(NULL);
}
return atomic_load_explicit(&g_remote_size, memory_order_relaxed);
}
uint32_t remote_side_effective_mask(void) {
if (!atomic_load_explicit(&g_remote_profile_inited, memory_order_acquire)) {
remote_side_apply_profile(NULL);
}
return atomic_load_explicit(&g_remote_mask, memory_order_relaxed);
}
rem_side_entry* remote_side_table(void) {
if (!atomic_load_explicit(&g_remote_allocated, memory_order_acquire)) {
remote_side_init(NULL, NULL);
}
return g_remote_slots;
}

View File

@ -0,0 +1,21 @@
#pragma once
// RemoteSideBox: tiny_remote の REM_SIDE をプロファイルで論理的に絞るための薄いラッパ
#include <stdint.h>
#include <stdatomic.h>
typedef struct rem_side_entry {
_Atomic(uintptr_t) key; // node pointer
_Atomic(uintptr_t) val; // next pointer
} rem_side_entry;
typedef struct RemoteSideBox RemoteSideBox;
// profile が NULL のときは HAKMEM_PROFILE を見る。
void remote_side_init(RemoteSideBox* box, const char* profile);
// 有効サイズ/マスク(配列自体は REM_SIDE_SIZE のまま)
uint32_t remote_side_effective_size(void);
uint32_t remote_side_effective_mask(void);
uint32_t remote_side_effective_log2(void);
rem_side_entry* remote_side_table(void);

View File

@ -0,0 +1,50 @@
#include "shared_pool_box.h"
#include <stdatomic.h>
#include <stdlib.h>
#include <string.h>
// 既存の g_shared_pool 配列上に「論理的な上限」だけを被せる。
static _Atomic uint32_t g_sp_total_limit = 0; // 0 = 無制限(現行のまま)
static _Atomic uint32_t g_sp_class_limit = 0; // 0 = 無制限
static _Atomic int g_sp_profile_inited = 0;
static void shared_pool_apply_profile(const char* profile) {
if (g_sp_profile_inited) {
return;
}
const char* env_profile = profile ? profile : getenv("HAKMEM_PROFILE");
int is_bench = (env_profile && strcmp(env_profile, "bench") == 0);
uint32_t total_limit = 0;
uint32_t class_limit = 0;
if (is_bench) {
// bench 用: ひとまず控えめな論理上限だけ入れる
total_limit = 65536; // 元の 1M よりかなり少ない
class_limit = 2048; // クラスあたりの active slot 上限の目安
}
atomic_store_explicit(&g_sp_total_limit, total_limit, memory_order_relaxed);
atomic_store_explicit(&g_sp_class_limit, class_limit, memory_order_relaxed);
atomic_store_explicit(&g_sp_profile_inited, 1, memory_order_release);
}
void shared_pool_box_init(SharedPoolBox* box, const char* profile) {
(void)box;
shared_pool_apply_profile(profile);
}
uint32_t shared_pool_effective_total_slots(void) {
if (!atomic_load_explicit(&g_sp_profile_inited, memory_order_acquire)) {
shared_pool_apply_profile(NULL);
}
return atomic_load_explicit(&g_sp_total_limit, memory_order_relaxed);
}
uint32_t shared_pool_effective_class_slots(int class_idx) {
(void)class_idx;
if (!atomic_load_explicit(&g_sp_profile_inited, memory_order_acquire)) {
shared_pool_apply_profile(NULL);
}
return atomic_load_explicit(&g_sp_class_limit, memory_order_relaxed);
}

View File

@ -0,0 +1,18 @@
#pragma once
// SharedPoolBox: 既存の g_shared_pool の上に「論理上限」を被せる軽量ラッパ。
// 目的:
// - HAKMEM_PROFILE=bench などのときに Shared Pool の増殖を論理的に抑える。
// - 配列サイズ自体は現状のままBSS をまだ縮めない)。
#include <stdint.h>
typedef struct SharedPoolBox SharedPoolBox;
// profile が NULL のときは HAKMEM_PROFILE を読む。
void shared_pool_box_init(SharedPoolBox* box, const char* profile);
// これ以上増やさない総枠。full では元の制限なし、bench では小さめ。
uint32_t shared_pool_effective_total_slots(void);
// クラス別の論理上限active slots がこの値を超えたら新規追加を抑制)
uint32_t shared_pool_effective_class_slots(int class_idx);

View File

@ -175,8 +175,12 @@ static void ace_observe_and_decide(int k) {
int ss_count = 0;
uint32_t total_live = 0;
for (int i = 0; i < SUPER_REG_SIZE; i++) {
SuperRegEntry* e = &g_super_reg[i];
SuperRegEntry* reg = super_reg_entries();
int reg_cap = super_reg_effective_size();
if (!reg || reg_cap <= 0) return;
for (int i = 0; i < reg_cap; i++) {
SuperRegEntry* e = &reg[i];
// Atomic read (thread-safe)
uintptr_t base = atomic_load_explicit(

View File

@ -284,6 +284,10 @@ SuperSlab* superslab_allocate(uint8_t size_class) {
}
} while (0);
if (!from_cache) {
ss_stats_on_ss_alloc_class(size_class);
}
return ss;
}

122
core/box/ss_budget_box.c Normal file
View File

@ -0,0 +1,122 @@
// ss_budget_box.c - Superslab Budget Box
// Box Theory: Budget/limit guard for Superslab growth.
// - ENV:
// HAKMEM_SS_BUDGET_GLOBAL : global cap (0 = unlimited, default varies)
// HAKMEM_SS_BUDGET_C0..C7 : per-class cap override (0 = unlimited)
// HAKMEM_SS_BUDGET_C7 : shorthand most often used
// - Profile hint:
// HAKMEM_TINY_PROFILE=larson_guard → stricter defaults.
#include "ss_budget_box.h"
#include <stdatomic.h>
#include <stdlib.h>
#include <strings.h>
#include <stdio.h>
#include "ss_stats_box.h"
static _Atomic int g_budget_init = 0;
static int g_ss_budget_global = 0;
static int g_ss_budget_per_class[8] = {0};
static int ss_budget_parse_env(const char* name, int fallback) {
const char* e = getenv(name);
if (e && *e) {
int v = atoi(e);
if (v < 0) v = 0;
return v;
}
return fallback;
}
static void ss_budget_init_once(void) {
if (atomic_load_explicit(&g_budget_init, memory_order_acquire)) {
return;
}
// Profile hint: larson_guard uses tighter defaults to cap RSS.
const char* profile = getenv("HAKMEM_TINY_PROFILE");
int is_larson_guard = (profile && strcasecmp(profile, "larson_guard") == 0);
// Defaults: unlimited unless larson_guard
int default_global = is_larson_guard ? 512 : 0;
g_ss_budget_global = ss_budget_parse_env("HAKMEM_SS_BUDGET_GLOBAL", default_global);
for (int i = 0; i < 8; i++) {
int def = 0;
if (is_larson_guard) {
// Larson guard: modest per-class caps, C7 is a bit looser.
def = (i == 7) ? 192 : 96;
}
g_ss_budget_per_class[i] = def;
}
// Per-class overrides: HAKMEM_SS_BUDGET_C7 or HAKMEM_SS_BUDGET_C{idx}
for (int i = 0; i < 8; i++) {
char buf[32];
snprintf(buf, sizeof(buf), "HAKMEM_SS_BUDGET_C%d", i);
int override = ss_budget_parse_env(buf, g_ss_budget_per_class[i]);
g_ss_budget_per_class[i] = override;
}
// Support the legacy shorthand HAKMEM_SS_BUDGET_C7
g_ss_budget_per_class[7] =
ss_budget_parse_env("HAKMEM_SS_BUDGET_C7", g_ss_budget_per_class[7]);
atomic_store_explicit(&g_budget_init, 1, memory_order_release);
}
static inline uint64_t ss_budget_global_live_sum(void) {
uint64_t sum = 0;
for (int i = 0; i < 8; i++) {
sum += atomic_load_explicit(&g_ss_live_by_class[i], memory_order_relaxed);
}
return sum;
}
bool ss_budget_on_alloc(int class_idx) {
ss_budget_init_once();
if (class_idx < 0 || class_idx >= 8) {
return true; // outside Tiny; do not gate here
}
uint64_t live_cls = atomic_load_explicit(&g_ss_live_by_class[class_idx],
memory_order_relaxed);
int class_cap = g_ss_budget_per_class[class_idx];
if (class_cap > 0 && live_cls >= (uint64_t)class_cap) {
static _Atomic uint32_t log_once = 0;
if (atomic_fetch_add_explicit(&log_once, 1, memory_order_relaxed) < 4) {
fprintf(stderr,
"[SS_BUDGET_DENY] class=%d live=%llu cap=%d\n",
class_idx,
(unsigned long long)live_cls,
class_cap);
}
return false;
}
int global_cap = g_ss_budget_global;
if (global_cap > 0) {
uint64_t live_total = ss_budget_global_live_sum();
if (live_total >= (uint64_t)global_cap) {
static _Atomic uint32_t g_log_once = 0;
if (atomic_fetch_add_explicit(&g_log_once, 1, memory_order_relaxed) < 4) {
fprintf(stderr,
"[SS_BUDGET_DENY_GLOBAL] live_total=%llu cap=%d class=%d\n",
(unsigned long long)live_total,
global_cap,
class_idx);
}
return false;
}
}
return true;
}
void ss_budget_on_free(int class_idx) {
(void)class_idx;
ss_budget_init_once();
// We currently rely on ss_stats_on_ss_free_class() to update live counters.
}

19
core/box/ss_budget_box.h Normal file
View File

@ -0,0 +1,19 @@
// ss_budget_box.h - Superslab Budget Box
// Box Theory: centralize budget/limit checks for Superslab allocations.
// Responsibilities:
// - Read budget ENV once (global + per-class override)
// - Provide cheap checks before allocating new Superslabs
// - Allow symmetric free hook for future accounting
#ifndef HAKMEM_SS_BUDGET_BOX_H
#define HAKMEM_SS_BUDGET_BOX_H
#include <stdbool.h>
// Return false when allocation should be denied due to budget exhaustion.
bool ss_budget_on_alloc(int class_idx);
// Hook for future bookkeeping; currently a no-op placeholder.
void ss_budget_on_free(int class_idx);
#endif // HAKMEM_SS_BUDGET_BOX_H

View File

@ -13,12 +13,15 @@ static inline void ss_slab_reset_meta_for_tiny(SuperSlab* ss,
if (!ss) return;
if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) return;
// class_idx < 0 means "unassigned" (255). Otherwise keep the requested class.
uint8_t target_class = (class_idx < 0) ? 255u : (uint8_t)class_idx;
TinySlabMeta* meta = &ss->slabs[slab_idx];
meta->used = 0;
meta->carved = 0;
meta->freelist = NULL;
meta->class_idx = (uint8_t)class_idx;
ss->class_map[slab_idx] = (uint8_t)class_idx;
meta->class_idx = target_class;
ss->class_map[slab_idx] = target_class;
// Reset remote queue state to avoid stale pending frees on reuse.
atomic_store_explicit(&ss->remote_heads[slab_idx], 0, memory_order_relaxed);

View File

@ -1,8 +1,10 @@
// ss_stats_box.c - SuperSlab Statistics Box Implementation
#include "ss_stats_box.h"
#include <stdbool.h>
#include "../superslab/superslab_inline.h"
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
// ============================================================================
// Global Statistics State
@ -30,6 +32,11 @@ _Atomic uint64_t g_free_ss_enter = 0; // hak_tiny_free_superslab() entr
_Atomic uint64_t g_free_local_box_calls = 0; // same-thread freelist pushes
_Atomic uint64_t g_free_remote_box_calls = 0; // cross-thread remote pushes
// Superslab/slab observability (Tiny-only; relaxed updates)
_Atomic uint64_t g_ss_live_by_class[8] = {0};
_Atomic uint64_t g_ss_empty_events[8] = {0};
_Atomic uint64_t g_slab_live_events[8] = {0};
// ============================================================================
// Statistics Update Implementation
// ============================================================================
@ -56,6 +63,36 @@ void ss_stats_cache_store(void) {
pthread_mutex_unlock(&g_superslab_lock);
}
void ss_stats_on_ss_alloc_class(int class_idx) {
if (class_idx >= 0 && class_idx < 8) {
atomic_fetch_add_explicit(&g_ss_live_by_class[class_idx], 1, memory_order_relaxed);
}
}
void ss_stats_on_ss_free_class(int class_idx) {
if (class_idx >= 0 && class_idx < 8) {
// Saturating-style decrement to avoid underflow from mismatched hooks
uint64_t prev = atomic_load_explicit(&g_ss_live_by_class[class_idx], memory_order_relaxed);
if (prev > 0) {
atomic_fetch_sub_explicit(&g_ss_live_by_class[class_idx], 1, memory_order_relaxed);
}
}
}
void ss_stats_on_ss_scan(int class_idx, int slab_live, int is_empty) {
if (class_idx < 0 || class_idx >= 8) {
return;
}
if (slab_live > 0) {
atomic_fetch_add_explicit(&g_slab_live_events[class_idx],
(uint64_t)slab_live,
memory_order_relaxed);
}
if (is_empty) {
atomic_fetch_add_explicit(&g_ss_empty_events[class_idx], 1, memory_order_relaxed);
}
}
// ============================================================================
// Statistics Reporting Implementation
// ============================================================================
@ -92,3 +129,23 @@ void superslab_print_global_stats(void) {
printf("Total bytes allocated: %lu MB\n", g_bytes_allocated / (1024 * 1024));
pthread_mutex_unlock(&g_superslab_lock);
}
void ss_stats_dump_if_requested(void) {
const char* env = getenv("HAKMEM_SS_STATS_DUMP");
if (!env || !*env || *env == '0') {
return;
}
fprintf(stderr, "[SS_STATS] class live empty_events slab_live_events\n");
for (int c = 0; c < 8; c++) {
uint64_t live = atomic_load_explicit(&g_ss_live_by_class[c], memory_order_relaxed);
uint64_t empty = atomic_load_explicit(&g_ss_empty_events[c], memory_order_relaxed);
uint64_t slab_live = atomic_load_explicit(&g_slab_live_events[c], memory_order_relaxed);
if (live || empty || slab_live) {
fprintf(stderr, " C%d: live=%llu empty=%llu slab_live=%llu\n",
c,
(unsigned long long)live,
(unsigned long long)empty,
(unsigned long long)slab_live);
}
}
}

View File

@ -43,6 +43,16 @@ extern _Atomic uint64_t g_free_ss_enter;
extern _Atomic uint64_t g_free_local_box_calls;
extern _Atomic uint64_t g_free_remote_box_calls;
// ============================================================================
// Superslab / Slab live-state observability (Tiny classes 0..7)
// ============================================================================
// NOTE: These are “event-style” counters updated at key transitions
// (alloc/free/reset) to keep overhead minimal. They are intended for
// regression detection and coarse budgeting rather than exact gauges.
extern _Atomic uint64_t g_ss_live_by_class[8]; // +1 on alloc, -1 on free (best-effort)
extern _Atomic uint64_t g_ss_empty_events[8]; // Observations of fully-empty Superslabs
extern _Atomic uint64_t g_slab_live_events[8]; // Observations of live slabs during scans
// ============================================================================
// Statistics Update API
// ============================================================================
@ -59,6 +69,11 @@ void ss_stats_cache_reuse(void);
// Thread-safe: mutex protected
void ss_stats_cache_store(void);
// Event-style observability helpers (Tiny classes only, relaxed atomics)
void ss_stats_on_ss_alloc_class(int class_idx);
void ss_stats_on_ss_free_class(int class_idx);
void ss_stats_on_ss_scan(int class_idx, int slab_live, int is_empty);
// ============================================================================
// Statistics Reporting API
// ============================================================================
@ -69,4 +84,7 @@ void superslab_print_stats(SuperSlab* ss);
// Print global SuperSlab statistics
void superslab_print_global_stats(void);
// ENV: HAKMEM_SS_STATS_DUMP=1 → dump coarse Superslab/slab counters once
void ss_stats_dump_if_requested(void);
#endif // HAKMEM_SS_STATS_BOX_H

View File

@ -119,7 +119,7 @@ static inline int ss_tls_bind_one(int class_idx,
tls->slab_base = tiny_slab_base_for(ss, slab_idx);
// Notify Tiny Page Box (if enabled for this class)
tiny_page_box_on_new_slab(tls);
tiny_page_box_on_new_slab(class_idx, tls);
// Sanity check: TLS must now describe this slab for this class.
// On failure, revert TLS to safe state and return 0.

143
core/box/super_reg_box.c Normal file
View File

@ -0,0 +1,143 @@
#include "super_reg_box.h"
#include <stdatomic.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "hakmem_super_registry.h"
// プロファイル別の実容量・論理上限
static _Atomic int g_super_reg_effective_size = SUPER_REG_SIZE;
static _Atomic int g_super_reg_effective_mask = SUPER_REG_MASK;
static _Atomic int g_super_reg_effective_per_class = SUPER_REG_PER_CLASS;
static _Atomic int g_super_reg_profile_inited = 0;
// 動的に確保する実配列
static SuperRegEntry* g_super_reg_entries = NULL;
static SuperSlab** g_super_reg_by_class_slots = NULL;
static int g_super_reg_by_class_stride = SUPER_REG_PER_CLASS;
static _Atomic int g_super_reg_allocated = 0;
static inline int super_reg_clamp_power_of_two(int requested, int fallback) {
// SUPER_REG_SIZE は 2 のべき乗なので、requested もそれ未満のべき乗に丸める。
if (requested <= 0 || requested > SUPER_REG_SIZE) {
return fallback;
}
// 丸め: 最上位ビットだけを残す2 のべき乗に丸め下げ)
int v = requested;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v = v - (v >> 1);
// 有効値は最低でも 1024 にしておく
if (v < 1024) {
v = 1024;
}
return v;
}
static void super_reg_apply_profile(const char* profile) {
if (g_super_reg_profile_inited) {
return;
}
const char* env_profile = profile ? profile : getenv("HAKMEM_PROFILE");
const int is_bench = (env_profile && strcmp(env_profile, "bench") == 0);
int eff_size = SUPER_REG_SIZE;
int eff_per_class = SUPER_REG_PER_CLASS;
if (is_bench) {
// 論理上の利用範囲だけ縮める(配列は従来サイズのまま)
eff_size = SUPER_REG_SIZE >> 3; // 1/8 に論理制限
eff_per_class = SUPER_REG_PER_CLASS >> 4; // 1/16
}
eff_size = super_reg_clamp_power_of_two(eff_size, SUPER_REG_SIZE);
eff_per_class = eff_per_class > 0 ? eff_per_class : SUPER_REG_PER_CLASS;
atomic_store_explicit(&g_super_reg_effective_size, eff_size, memory_order_relaxed);
atomic_store_explicit(&g_super_reg_effective_mask, eff_size - 1, memory_order_relaxed);
atomic_store_explicit(&g_super_reg_effective_per_class,
eff_per_class,
memory_order_relaxed);
atomic_store_explicit(&g_super_reg_profile_inited, 1, memory_order_release);
}
void super_reg_init(SuperRegBox* box, const char* profile) {
(void)box;
super_reg_apply_profile(profile);
if (atomic_load_explicit(&g_super_reg_allocated, memory_order_acquire)) {
return;
}
int eff_size = super_reg_effective_size();
int per_class = super_reg_effective_per_class();
// Allocate registry table
size_t reg_bytes = (size_t)eff_size * sizeof(SuperRegEntry);
g_super_reg_entries = (SuperRegEntry*)calloc(eff_size, sizeof(SuperRegEntry));
if (!g_super_reg_entries) {
fprintf(stderr, "[SUPER_REG] failed to allocate %zu bytes for registry\n", reg_bytes);
abort();
}
// Allocate per-class table (contiguous 1D block)
size_t per_class_bytes = (size_t)TINY_NUM_CLASSES * (size_t)per_class * sizeof(SuperSlab*);
g_super_reg_by_class_slots = (SuperSlab**)calloc(TINY_NUM_CLASSES * (size_t)per_class,
sizeof(SuperSlab*));
if (!g_super_reg_by_class_slots) {
fprintf(stderr, "[SUPER_REG] failed to allocate %zu bytes for per-class registry\n",
per_class_bytes);
abort();
}
g_super_reg_by_class_stride = per_class;
atomic_store_explicit(&g_super_reg_allocated, 1, memory_order_release);
}
int super_reg_effective_size(void) {
if (!atomic_load_explicit(&g_super_reg_profile_inited, memory_order_acquire)) {
super_reg_apply_profile(NULL);
}
return atomic_load_explicit(&g_super_reg_effective_size, memory_order_relaxed);
}
int super_reg_effective_mask(void) {
if (!atomic_load_explicit(&g_super_reg_profile_inited, memory_order_acquire)) {
super_reg_apply_profile(NULL);
}
return atomic_load_explicit(&g_super_reg_effective_mask, memory_order_relaxed);
}
int super_reg_effective_per_class(void) {
if (!atomic_load_explicit(&g_super_reg_profile_inited, memory_order_acquire)) {
super_reg_apply_profile(NULL);
}
return atomic_load_explicit(&g_super_reg_effective_per_class, memory_order_relaxed);
}
SuperRegEntry* super_reg_entries(void) {
if (!atomic_load_explicit(&g_super_reg_allocated, memory_order_acquire)) {
super_reg_init(NULL, NULL);
}
return g_super_reg_entries;
}
SuperSlab** super_reg_by_class_slots(void) {
if (!atomic_load_explicit(&g_super_reg_allocated, memory_order_acquire)) {
super_reg_init(NULL, NULL);
}
return g_super_reg_by_class_slots;
}
int super_reg_by_class_stride(void) {
if (!atomic_load_explicit(&g_super_reg_allocated, memory_order_acquire)) {
super_reg_init(NULL, NULL);
}
return g_super_reg_by_class_stride;
}

77
core/box/super_reg_box.h Normal file
View File

@ -0,0 +1,77 @@
#pragma once
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
#ifndef TINY_NUM_CLASSES
#define TINY_NUM_CLASSES 8
#endif
// SuperRegBox (設計メモ / API スタブ)
// -------------------------------------
// 役割:
// - g_super_reg / g_super_reg_by_class への直接依存を断ち、レジストリ容量を
// プロファイルfull/prod/bench/larson_guard 等)で切り替えられるようにする箱。
// - Box 内部だけで容量決定・確保・破棄を閉じ、外側は薄い API を呼ぶだけにする。
//
// プロファイル方針(案):
// - full/prod : 現行の SUPER_REG_SIZE (=1,048,576) と SUPER_REG_PER_CLASS (=16,384) を維持
// - bench : SUPER_REG_SIZE を 1/16〜1/8 程度 (例: 65,536)、per-class は 1,024 などに縮小
// - guard : bench 同等かさらに小さくして fail-fastENOMEMを優先
//
// スレッド安全性:
// - 既存のロック/atomic 公算を流用しつつ、構造体にまとめて「初期化済みか」を判定。
//
// 想定 API実装は今後:
typedef struct SuperSlab SuperSlab;
typedef struct SuperRegBox SuperRegBox;
struct SuperRegEntry;
// プロファイル/ENV に応じて容量を決定し、内部配列を確保。
// profile が NULL のときは HAKMEM_PROFILE (bench / full など) を読む。
void super_reg_init(SuperRegBox* box, const char* profile);
// 現在有効なスロット数/マスク
int super_reg_effective_size(void);
int super_reg_effective_mask(void);
int super_reg_effective_per_class(void);
// レジストリ実体へのアクセスBox 内部で動的確保)
struct SuperRegEntry* super_reg_entries(void);
SuperSlab** super_reg_by_class_slots(void);
int super_reg_by_class_stride(void);
static inline SuperSlab* super_reg_by_class_at(int class_idx, int idx) {
SuperSlab** slots = super_reg_by_class_slots();
int stride = super_reg_by_class_stride();
if (!slots || stride <= 0 || class_idx < 0 || idx < 0 ||
class_idx >= TINY_NUM_CLASSES || idx >= stride) {
return NULL;
}
return slots[class_idx * stride + idx];
}
static inline void super_reg_by_class_set(int class_idx, int idx, SuperSlab* ss) {
SuperSlab** slots = super_reg_by_class_slots();
int stride = super_reg_by_class_stride();
if (!slots || stride <= 0 || class_idx < 0 || idx < 0 ||
class_idx >= TINY_NUM_CLASSES || idx >= stride) {
return;
}
slots[class_idx * stride + idx] = ss;
}
// Superslab 登録/解除(既存の hak_super_register/unregister 相当を箱内に閉じ込める)
bool super_reg_register(SuperRegBox* box, SuperSlab* ss, uint32_t class_idx);
void super_reg_unregister(SuperRegBox* box, SuperSlab* ss, uint32_t class_idx);
// アドレス検索/クラス別イテレーション(必要最小限の薄い API
SuperSlab* super_reg_find_by_addr(SuperRegBox* box, void* ptr);
SuperSlab* super_reg_iter_for_class(SuperRegBox* box, uint32_t class_idx, void** cursor);
// 将来のメモリ削減策(コメントのみ)
// - g_super_reg/g_super_reg_by_class を「malloc/mmap でプロファイル毎に確保」するようにし、
// BSS から切り離す。
// - bench プロファイルでは固定長を大幅に縮め、足りなければ ENOMEM を返して fail-fast。
// - prod では現行サイズを維持しつつ、Box 境界でのみアクセスさせる。***
// 前方宣言(実装は既存の superslab に依存)
// typedef struct SuperSlab SuperSlab; // 上で宣言済み

View File

@ -0,0 +1,63 @@
// C7 専用の実験的ホットパス。HAKMEM_TINY_C7_HOT=1 でのみ有効化し、
// デフォルト(未設定/0のときは従来経路に完全フォールバックする。
// 本番デフォルトで ON にしない前提の A/B 用スイッチ。
#pragma once
#include "../hakmem_build_flags.h"
#include "c7_hotpath_env_box.h"
#include "tiny_c7_uc_hit_box.h"
#include "tiny_c7_warm_spill_box.h"
#include "tiny_c7_stats_sample_box.h"
#include "tiny_front_hot_box.h"
#include "tiny_front_cold_box.h"
#include "front_gate_box.h"
#include "tls_sll_box.h"
#include "ptr_conversion_box.h"
// C7 alloc ホットパス。
// 順序:
// 1) TLS/SFC (front_gate_try_pop) を先に覗く
// 2) Unified Cache のヒット専用パス tiny_uc_pop_c7_hit_only()
// 3) それでもダメなら通常の cold refillrefill/統計は cold 側に任せる)
static inline void* tiny_c7_alloc_hot(size_t size) {
(void)size; // size は class_idx=7 前提なので未使用
void* user = NULL;
// 1) SFC/TLS SLL 直叩き(ユーザーポインタが返る)
if (front_gate_try_pop(/*class_idx=*/7, &user)) {
return user;
}
// 2) Unified Cache ヒット
user = tiny_uc_pop_c7_hit_only();
if (__builtin_expect(user != NULL, 1)) {
return user;
}
// 3) Cold refill へフォールバック
return tiny_cold_refill_and_alloc(7);
}
// C7 free ホットパス。BASE を受け取り TLS→UC の順に試す。
static inline int tiny_c7_free_hot(void* base) {
// 1) TLS SLL へ直接 pushBASE のまま渡す)
extern int g_tls_sll_enable;
if (__builtin_expect(g_tls_sll_enable, 1)) {
if (tls_sll_push(7, HAK_BASE_FROM_RAW(base), UINT32_MAX)) {
return 1;
}
}
// 2) Unified Cache へ pushヒット専用の軽量版
if (tiny_uc_push_c7_hot(base)) {
return 1;
}
// 3) Warm spill将来用のフック
if (tiny_c7_warm_spill_one(base)) {
return 1;
}
// 4) 最後に cold free パスへフォールバック
return tiny_cold_drain_and_free(7, base);
}

View File

@ -0,0 +1,9 @@
// tiny_c7_stats_sample_box.h - Lightweight sampling helper for C7 stats
// 現状は簡易 1/16 サンプリング。hot path から #if を排除するための小箱。
#pragma once
static inline int tiny_c7_stats_sample(void) {
static __thread unsigned counter = 0;
counter++;
return (counter & 0xF) == 0; // 約 1/16
}

View File

@ -0,0 +1,58 @@
// tiny_c7_uc_hit_box.h - C7 専用 Unified Cache hit-only helpers
// 契約: ヒット時のみ処理。ミス時は NULL/0 を返し、refill・統計は行わない。
#pragma once
#include "../front/tiny_unified_cache.h"
#include "tiny_layout_box.h"
// C7 UC ヒット専用 pop
static inline void* tiny_uc_pop_c7_hit_only(void) {
TinyUnifiedCache* cache = &g_unified_cache[7];
#if !HAKMEM_TINY_FRONT_PGO
if (__builtin_expect(cache->slots == NULL, 0)) {
unified_cache_init();
if (cache->slots == NULL) {
return NULL;
}
}
#endif
if (__builtin_expect(cache->head == cache->tail, 0)) {
return NULL;
}
void* base = cache->slots[cache->head];
cache->head = (cache->head + 1) & cache->mask;
#if HAKMEM_TINY_HEADER_CLASSIDX
tiny_region_id_write_header(base, 7);
size_t user_offset = tiny_user_offset(7);
return (void*)((char*)base + user_offset);
#else
return base;
#endif
}
// C7 UC ヒット専用 push
static inline int tiny_uc_push_c7_hot(void* base) {
TinyUnifiedCache* cache = &g_unified_cache[7];
#if !HAKMEM_TINY_FRONT_PGO
if (__builtin_expect(cache->slots == NULL, 0)) {
unified_cache_init();
if (cache->slots == NULL) {
return 0;
}
}
#endif
uint16_t next_tail = (cache->tail + 1) & cache->mask;
if (__builtin_expect(next_tail == cache->head, 0)) {
return 0; // full
}
cache->slots[cache->tail] = base;
cache->tail = next_tail;
return 1;
}

View File

@ -0,0 +1,9 @@
// tiny_c7_warm_spill_box.h - C7 Warm spill hook (placeholder)
// Purpose: allow swapping spill実装 without touchingホットパス。
#pragma once
// いまは no-op。将来 Warm spill を挿すときに差し替える。
static inline int tiny_c7_warm_spill_one(void* base) {
(void)base;
return 0;
}

View File

@ -6,17 +6,20 @@
#include <string.h>
#include <strings.h>
#include "tiny_policy_learner_box.h"
#include "tiny_mem_stats_box.h"
TinyClassPolicy g_tiny_class_policy[TINY_NUM_CLASSES];
static _Atomic int g_tiny_class_policy_init_done = 0;
static _Atomic int g_tiny_class_policy_logged = 0;
static _Atomic int g_tiny_class_policy_profile_auto = 0;
static _Atomic int g_tiny_class_policy_mem_recorded = 0;
static inline TinyClassPolicy tiny_class_policy_default_entry(void) {
TinyClassPolicy p = {0};
p.page_box_enabled = 0;
p.warm_enabled = 0;
p.warm_cap = 0;
p.tls_carve_enabled = 0;
return p;
}
@ -30,6 +33,7 @@ static void tiny_class_policy_set_legacy(void) {
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
g_tiny_class_policy[i].warm_enabled = 1;
g_tiny_class_policy[i].warm_cap = (i < 5) ? 4 : 8;
g_tiny_class_policy[i].tls_carve_enabled = (i >= 5) ? 1 : 0;
}
for (int i = 5; i < TINY_NUM_CLASSES; i++) {
g_tiny_class_policy[i].page_box_enabled = 1;
@ -45,6 +49,7 @@ static void tiny_class_policy_set_c5_7_only(void) {
g_tiny_class_policy[i].page_box_enabled = 1;
g_tiny_class_policy[i].warm_enabled = 1;
g_tiny_class_policy[i].warm_cap = 8;
g_tiny_class_policy[i].tls_carve_enabled = 1;
}
}
@ -53,6 +58,18 @@ static void tiny_class_policy_set_tinyplus_all(void) {
tiny_class_policy_set_legacy();
}
static void tiny_class_policy_set_larson_guard(void) {
// Start from legacy, then tighten warm caps to reduce RSS for larson-style loads.
tiny_class_policy_set_legacy();
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
if (i < 5) {
g_tiny_class_policy[i].warm_cap = 2;
} else {
g_tiny_class_policy[i].warm_cap = 4;
}
}
}
static void tiny_class_policy_set_auto(void) {
// auto プロファイルは legacy をベースにして、後段の learner に委譲
tiny_class_policy_set_legacy();
@ -72,6 +89,10 @@ static const char* tiny_class_policy_set_profile(const char* profile) {
tiny_class_policy_set_tinyplus_all();
atomic_store_explicit(&g_tiny_class_policy_profile_auto, 0, memory_order_release);
return "tinyplus_all";
} else if (strcasecmp(profile, "larson_guard") == 0) {
tiny_class_policy_set_larson_guard();
atomic_store_explicit(&g_tiny_class_policy_profile_auto, 0, memory_order_release);
return "larson_guard";
} else if (strcasecmp(profile, "auto") == 0) {
tiny_class_policy_set_auto();
return "auto";
@ -84,16 +105,20 @@ static const char* tiny_class_policy_set_profile(const char* profile) {
}
void tiny_class_policy_dump(const char* tag) {
if (!tiny_policy_log_enabled()) {
return;
}
const char* header = tag ? tag : "[POLICY_DUMP]";
fprintf(stderr, "%s\n", header);
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
TinyClassPolicy* p = &g_tiny_class_policy[cls];
fprintf(stderr,
" C%d: page=%u warm=%u cap=%u\n",
" C%d: page=%u warm=%u cap=%u tls_carve=%u\n",
cls,
p->page_box_enabled,
p->warm_enabled,
p->warm_cap);
p->warm_cap,
p->tls_carve_enabled);
}
}
@ -105,8 +130,13 @@ void tiny_class_policy_init_once(void) {
const char* profile = getenv("HAKMEM_TINY_POLICY_PROFILE");
const char* active_profile = tiny_class_policy_set_profile(profile);
if (atomic_exchange_explicit(&g_tiny_class_policy_mem_recorded, 1, memory_order_acq_rel) == 0) {
tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_policy));
}
// 1-shot ダンプでポリシーの内容を可視化(デバッグ用)
if (atomic_exchange_explicit(&g_tiny_class_policy_logged, 1, memory_order_acq_rel) == 0) {
if (tiny_policy_log_enabled() &&
atomic_exchange_explicit(&g_tiny_class_policy_logged, 1, memory_order_acq_rel) == 0) {
fprintf(stderr, "[POLICY_INIT] profile=%s\n", active_profile);
tiny_class_policy_dump(NULL);
}
@ -121,3 +151,8 @@ void tiny_class_policy_refresh_auto(void) {
}
tiny_policy_learner_tick();
}
int tiny_class_policy_is_auto(void) {
tiny_class_policy_init_once();
return atomic_load_explicit(&g_tiny_class_policy_profile_auto, memory_order_acquire);
}

View File

@ -15,23 +15,37 @@
#include <stdatomic.h>
#include <stdint.h>
#include <stdlib.h>
#include "../hakmem_tiny_config.h"
typedef struct TinyClassPolicy {
uint8_t page_box_enabled; // Enable Tiny Page Box for this class
uint8_t warm_enabled; // Enable Warm Pool for this class
uint8_t warm_cap; // Max warm SuperSlabs to keep (per-thread)
uint8_t reserved;
uint8_t tls_carve_enabled; // Enable Warm→TLS carve experiment for this class
} TinyClassPolicy;
extern TinyClassPolicy g_tiny_class_policy[TINY_NUM_CLASSES];
// ENV-gated policy logging (default ON; disable with HAKMEM_TINY_POLICY_LOG=0)
static inline int tiny_policy_log_enabled(void) {
static int g_policy_log = -1;
if (__builtin_expect(g_policy_log == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_POLICY_LOG");
g_policy_log = (e && *e && *e != '0') ? 1 : 0;
}
return g_policy_log;
}
// Initialize policy table once (idempotent).
void tiny_class_policy_init_once(void);
// Refresh auto profile based on learner output (no-op for non-auto profiles)
void tiny_class_policy_refresh_auto(void);
// True when active profile is "auto" (learner-managed)
int tiny_class_policy_is_auto(void);
// Debug helper: dump current policy (tag optional)
void tiny_class_policy_dump(const char* tag);

View File

@ -1,6 +1,7 @@
// tiny_class_stats_box.c - Thread-local stats storage for Tiny classes
#include "tiny_class_stats_box.h"
#include "tiny_mem_stats_box.h"
#include <stdio.h>
#include <string.h>
@ -8,6 +9,20 @@ __thread TinyClassStatsThread g_tiny_class_stats = {0};
_Atomic uint64_t g_tiny_class_stats_uc_miss_global[TINY_NUM_CLASSES] = {0};
_Atomic uint64_t g_tiny_class_stats_warm_hit_global[TINY_NUM_CLASSES] = {0};
_Atomic uint64_t g_tiny_class_stats_shared_lock_global[TINY_NUM_CLASSES] = {0};
_Atomic uint64_t g_tiny_class_stats_tls_carve_attempt_global[TINY_NUM_CLASSES] = {0};
_Atomic uint64_t g_tiny_class_stats_tls_carve_success_global[TINY_NUM_CLASSES] = {0};
static _Atomic int g_tiny_class_stats_mem_recorded = 0;
static void tiny_class_stats_record_mem_once(void) {
if (atomic_exchange_explicit(&g_tiny_class_stats_mem_recorded, 1, memory_order_acq_rel) == 0) {
tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_stats));
tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_stats_uc_miss_global));
tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_stats_warm_hit_global));
tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_stats_shared_lock_global));
tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_stats_tls_carve_attempt_global));
tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_stats_tls_carve_success_global));
}
}
void tiny_class_stats_reset_thread(void) {
memset(&g_tiny_class_stats, 0, sizeof(g_tiny_class_stats));
@ -15,11 +30,13 @@ void tiny_class_stats_reset_thread(void) {
void tiny_class_stats_snapshot_thread(TinyClassStatsThread* out) {
if (!out) return;
tiny_class_stats_record_mem_once();
memcpy(out, &g_tiny_class_stats, sizeof(*out));
}
void tiny_class_stats_snapshot_global(TinyClassStatsThread* out) {
if (!out) return;
tiny_class_stats_record_mem_once();
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
out->uc_miss[i] = atomic_load_explicit(&g_tiny_class_stats_uc_miss_global[i],
memory_order_relaxed);
@ -27,6 +44,10 @@ void tiny_class_stats_snapshot_global(TinyClassStatsThread* out) {
memory_order_relaxed);
out->shared_lock[i] = atomic_load_explicit(&g_tiny_class_stats_shared_lock_global[i],
memory_order_relaxed);
out->tls_carve_attempt[i] = atomic_load_explicit(
&g_tiny_class_stats_tls_carve_attempt_global[i], memory_order_relaxed);
out->tls_carve_success[i] = atomic_load_explicit(
&g_tiny_class_stats_tls_carve_success_global[i], memory_order_relaxed);
}
}
@ -34,14 +55,18 @@ static void tiny_class_stats_dump_common(FILE* out,
const char* tag,
const TinyClassStatsThread* stats) {
if (!(out && stats)) return;
fprintf(out, "%s class uc_miss warm_hit shared_lock\n", tag ? tag : "[STATS]");
fprintf(out, "%s class uc_miss warm_hit shared_lock tls_carve_attempt tls_carve_success\n",
tag ? tag : "[STATS]");
for (int c = 0; c < TINY_NUM_CLASSES; c++) {
if (stats->uc_miss[c] || stats->warm_hit[c] || stats->shared_lock[c]) {
fprintf(out, " C%d: %llu %llu %llu\n",
if (stats->uc_miss[c] || stats->warm_hit[c] || stats->shared_lock[c] ||
stats->tls_carve_attempt[c] || stats->tls_carve_success[c]) {
fprintf(out, " C%d: %llu %llu %llu %llu %llu\n",
c,
(unsigned long long)stats->uc_miss[c],
(unsigned long long)stats->warm_hit[c],
(unsigned long long)stats->shared_lock[c]);
(unsigned long long)stats->shared_lock[c],
(unsigned long long)stats->tls_carve_attempt[c],
(unsigned long long)stats->tls_carve_success[c]);
}
}
}

View File

@ -16,6 +16,8 @@ typedef struct TinyClassStatsThread {
uint64_t uc_miss[TINY_NUM_CLASSES]; // unified_cache_refill() hits
uint64_t warm_hit[TINY_NUM_CLASSES]; // warm pool successes
uint64_t shared_lock[TINY_NUM_CLASSES]; // shared pool lock acquisitions (hook as needed)
uint64_t tls_carve_attempt[TINY_NUM_CLASSES]; // Warm/TLS carve attempts
uint64_t tls_carve_success[TINY_NUM_CLASSES]; // Warm/TLS carve successes
} TinyClassStatsThread;
extern __thread TinyClassStatsThread g_tiny_class_stats;
@ -24,6 +26,8 @@ extern __thread TinyClassStatsThread g_tiny_class_stats;
extern _Atomic uint64_t g_tiny_class_stats_uc_miss_global[TINY_NUM_CLASSES];
extern _Atomic uint64_t g_tiny_class_stats_warm_hit_global[TINY_NUM_CLASSES];
extern _Atomic uint64_t g_tiny_class_stats_shared_lock_global[TINY_NUM_CLASSES];
extern _Atomic uint64_t g_tiny_class_stats_tls_carve_attempt_global[TINY_NUM_CLASSES];
extern _Atomic uint64_t g_tiny_class_stats_tls_carve_success_global[TINY_NUM_CLASSES];
static inline void tiny_class_stats_on_uc_miss(int ci) {
if (ci >= 0 && ci < TINY_NUM_CLASSES) {
@ -49,6 +53,22 @@ static inline void tiny_class_stats_on_shared_lock(int ci) {
}
}
static inline void tiny_class_stats_on_tls_carve_attempt(int ci) {
if (ci >= 0 && ci < TINY_NUM_CLASSES) {
g_tiny_class_stats.tls_carve_attempt[ci]++;
atomic_fetch_add_explicit(&g_tiny_class_stats_tls_carve_attempt_global[ci],
1, memory_order_relaxed);
}
}
static inline void tiny_class_stats_on_tls_carve_success(int ci) {
if (ci >= 0 && ci < TINY_NUM_CLASSES) {
g_tiny_class_stats.tls_carve_success[ci]++;
atomic_fetch_add_explicit(&g_tiny_class_stats_tls_carve_success_global[ci],
1, memory_order_relaxed);
}
}
// Optional: reset per-thread counters (cold path only).
void tiny_class_stats_reset_thread(void);

View File

@ -0,0 +1,65 @@
// tiny_mem_stats_box.c - Memory accounting helpers for Tiny front components
#include "tiny_mem_stats_box.h"
#include <stdatomic.h>
#include <sys/types.h>
#include <stdio.h>
_Atomic long long g_tiny_mem_unified_cache_bytes = 0;
_Atomic long long g_tiny_mem_warm_pool_bytes = 0;
_Atomic long long g_tiny_mem_page_box_bytes = 0;
_Atomic long long g_tiny_mem_tls_magazine_bytes = 0;
_Atomic long long g_tiny_mem_policy_stats_bytes = 0;
static inline void tiny_mem_stats_add(_Atomic long long* target, ssize_t bytes) {
if (!target || bytes == 0) {
return;
}
atomic_fetch_add_explicit(target, (long long)bytes, memory_order_relaxed);
}
void tiny_mem_stats_add_unified(ssize_t bytes) {
tiny_mem_stats_add(&g_tiny_mem_unified_cache_bytes, bytes);
}
void tiny_mem_stats_add_warm(ssize_t bytes) {
tiny_mem_stats_add(&g_tiny_mem_warm_pool_bytes, bytes);
}
void tiny_mem_stats_add_pagebox(ssize_t bytes) {
tiny_mem_stats_add(&g_tiny_mem_page_box_bytes, bytes);
}
void tiny_mem_stats_add_tls_magazine(ssize_t bytes) {
tiny_mem_stats_add(&g_tiny_mem_tls_magazine_bytes, bytes);
}
void tiny_mem_stats_add_policy_stats(ssize_t bytes) {
tiny_mem_stats_add(&g_tiny_mem_policy_stats_bytes, bytes);
}
void tiny_mem_stats_dump(void) {
long long unified = atomic_load_explicit(&g_tiny_mem_unified_cache_bytes,
memory_order_relaxed);
long long warm = atomic_load_explicit(&g_tiny_mem_warm_pool_bytes,
memory_order_relaxed);
long long pagebox = atomic_load_explicit(&g_tiny_mem_page_box_bytes,
memory_order_relaxed);
long long tls_mag = atomic_load_explicit(&g_tiny_mem_tls_magazine_bytes,
memory_order_relaxed);
long long policy_stats = atomic_load_explicit(&g_tiny_mem_policy_stats_bytes,
memory_order_relaxed);
long long total = unified + warm + pagebox + tls_mag + policy_stats;
fprintf(stderr,
"[TINY_MEM_STATS] unified_cache=%lldKB warm_pool=%lldKB page_box=%lldKB "
"tls_mag=%lldKB policy_stats=%lldKB total=%lldKB\n",
unified / 1024,
warm / 1024,
pagebox / 1024,
tls_mag / 1024,
policy_stats / 1024,
total / 1024);
}

View File

@ -0,0 +1,38 @@
// tiny_mem_stats_box.h - Lightweight memory accounting for Tiny front boxes
//
// Purpose:
// - Provide coarse-grained byte counters for major Tiny front allocations
// (Unified Cache buffers, Warm Pool TLS state, Page Box TLS state,
// TLS magazine/front caches, and policy/stats tables).
// - Keep overhead near-zero: helpers are simple fetch-adds, typically called
// at init time when the structures are allocated.
//
// Usage:
// - Call tiny_mem_stats_add_*() at allocation/free sites (positive/negative).
// - Call tiny_mem_stats_dump() when HAKMEM_TINY_MEM_DUMP is set to emit one
// summary line to stderr (values reported in KB).
#ifndef TINY_MEM_STATS_BOX_H
#define TINY_MEM_STATS_BOX_H
#include <stddef.h>
#include <stdint.h>
#include <sys/types.h>
// Byte counters (signed to allow subtracting on free paths)
extern _Atomic long long g_tiny_mem_unified_cache_bytes;
extern _Atomic long long g_tiny_mem_warm_pool_bytes;
extern _Atomic long long g_tiny_mem_page_box_bytes;
extern _Atomic long long g_tiny_mem_tls_magazine_bytes;
extern _Atomic long long g_tiny_mem_policy_stats_bytes;
void tiny_mem_stats_add_unified(ssize_t bytes);
void tiny_mem_stats_add_warm(ssize_t bytes);
void tiny_mem_stats_add_pagebox(ssize_t bytes);
void tiny_mem_stats_add_tls_magazine(ssize_t bytes);
void tiny_mem_stats_add_policy_stats(ssize_t bytes);
// Dump one line summary (values in KB) if hooked by caller.
void tiny_mem_stats_dump(void);
#endif // TINY_MEM_STATS_BOX_H

View File

@ -1,6 +1,5 @@
#include "tiny_page_box.h"
// TLS state definitions for Tiny Page Box
__thread TinyPageBoxState g_tiny_page_box_state[TINY_NUM_CLASSES];
__thread TinyPageBoxContext g_tiny_page_box[TINY_NUM_CLASSES];
__thread int g_tiny_page_box_init_done = 0;

View File

@ -9,7 +9,7 @@
// - API is generic over class_idx (0-7), but enabled-classes are controlled
// by ENV so that we can start with C7 only and later extend to C5/C6.
// - When enabled for a class:
// tiny_page_box_refill(class_idx, out, max) will try to supply up to
// tiny_page_box_refill(class_idx, tls, out, max) will try to supply up to
// `max` BASE pointers using per-page freelist before falling back.
// - When disabled for a class: the box returns 0 and caller uses legacy path.
//
@ -37,6 +37,7 @@
#include "../superslab/superslab_types.h" // For TinySlabMeta, SuperSlab
#include "../box/tiny_next_ptr_box.h" // For tiny_next_read()
#include "../hakmem_tiny_superslab.h" // For tiny_stride_for_class(), base helpers, superslab_ref_inc/dec
#include "../box/tiny_mem_stats_box.h" // For coarse memory accounting
// Superslab active counterRelease Guard Box と整合性を取るためのカウンタ更新)
extern void ss_active_add(SuperSlab* ss, uint32_t n);
@ -61,19 +62,28 @@ typedef struct TinyPageDesc {
// - enabled: このクラスで Page Box を使うかどうか
// - num_pages: 現在保持しているページ数0〜TINY_PAGE_BOX_MAX_PAGES
// - pages[]: TLS が掴んだ C7/C5/C6 ページの ring小さなバッファ
typedef struct TinyPageBoxState {
typedef struct TinyPageBoxContext {
uint8_t enabled; // 1=Page Box enabled for this class, 0=disabled
uint8_t num_pages; // 有効な pages[] エントリ数
uint8_t _pad[2];
TinyPageDesc pages[TINY_PAGE_BOX_MAX_PAGES];
} TinyPageBoxState;
} TinyPageBoxContext;
// TLS/state: one TinyPageBoxState per classper-thread Box
extern __thread TinyPageBoxState g_tiny_page_box_state[TINY_NUM_CLASSES];
// TLS/state: one TinyPageBoxContext per classper-thread Box
extern __thread TinyPageBoxContext g_tiny_page_box[TINY_NUM_CLASSES];
// One-shot init guardper-thread
extern __thread int g_tiny_page_box_init_done;
static inline int tiny_page_box_log_enabled(void) {
static int g_page_box_log = -1;
if (__builtin_expect(g_page_box_log == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_PAGEBOX_LOG");
g_page_box_log = (e && *e && *e != '0') ? 1 : 0;
}
return g_page_box_log;
}
// Helper: parse class list from ENV and set enabled flags.
// Default behaviour (ENV unset/empty) is to enable class 7 only.
static inline void tiny_page_box_init_once(void) {
@ -82,13 +92,14 @@ static inline void tiny_page_box_init_once(void) {
}
// Clear all state
memset(g_tiny_page_box_state, 0, sizeof(g_tiny_page_box_state));
memset(g_tiny_page_box, 0, sizeof(g_tiny_page_box));
tiny_mem_stats_add_pagebox((ssize_t)sizeof(g_tiny_page_box));
const char* env = getenv("HAKMEM_TINY_PAGE_BOX_CLASSES");
if (!env || !*env) {
// Default: enable mid-size classes (C5C7)
for (int c = 5; c <= 7 && c < TINY_NUM_CLASSES; c++) {
g_tiny_page_box_state[c].enabled = 1;
g_tiny_page_box[c].enabled = 1;
}
} else {
// Parse simple comma-separated list of integers: "5,6,7"
@ -107,7 +118,7 @@ static inline void tiny_page_box_init_once(void) {
p++;
}
if (val >= 0 && val < TINY_NUM_CLASSES) {
g_tiny_page_box_state[val].enabled = 1;
g_tiny_page_box[val].enabled = 1;
}
}
}
@ -123,7 +134,7 @@ static inline int tiny_page_box_is_enabled(int class_idx) {
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
return 0;
}
return g_tiny_page_box_state[class_idx].enabled != 0;
return g_tiny_page_box[class_idx].enabled != 0;
}
// Forward declaration for TLS slab statetiny_tls.h から参照)
@ -133,7 +144,7 @@ extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];
// ここで Page Box が利用可能なページとして登録しておくことで、
// 後続の unified_cache_refill() から Superslab/Warm Pool に落ちる前に
// 「既に TLS が掴んでいるページ」を優先的に使えるようにする。
static inline void tiny_page_box_on_new_slab(TinyTLSSlab* tls)
static inline void tiny_page_box_on_new_slab(int class_idx, TinyTLSSlab* tls)
{
if (!tls) {
return;
@ -143,6 +154,10 @@ static inline void tiny_page_box_on_new_slab(TinyTLSSlab* tls)
tiny_page_box_init_once();
}
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
return;
}
SuperSlab* ss = tls->ss;
TinySlabMeta* meta = tls->meta;
uint8_t* base = tls->slab_base;
@ -152,12 +167,11 @@ static inline void tiny_page_box_on_new_slab(TinyTLSSlab* tls)
return;
}
int class_idx = (int)meta->class_idx;
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
if (meta->class_idx != (uint8_t)class_idx) {
return;
}
TinyPageBoxState* st = &g_tiny_page_box_state[class_idx];
TinyPageBoxContext* st = &g_tiny_page_box[class_idx];
if (!st->enabled) {
return;
}
@ -200,9 +214,11 @@ static inline void tiny_page_box_on_new_slab(TinyTLSSlab* tls)
superslab_ref_inc(ss);
#if !HAKMEM_BUILD_RELEASE
// Debug: Track Page Box stats per-class
fprintf(stderr, "[PAGE_BOX_REG] class=%d num_pages=%u capacity=%u carved=%u\n",
class_idx, st->num_pages, meta->capacity, meta->carved);
// Debug: Track Page Box stats per-classENV: HAKMEM_TINY_PAGEBOX_LOG=0 で抑制)
if (tiny_page_box_log_enabled()) {
fprintf(stderr, "[PAGE_BOX_REG] class=%d num_pages=%u capacity=%u carved=%u\n",
class_idx, st->num_pages, meta->capacity, meta->carved);
}
#endif
}
@ -219,9 +235,11 @@ static inline void tiny_page_box_on_new_slab(TinyTLSSlab* tls)
// - Superslab/Shared Pool 呼び出し頻度を徐々に観測・調整できる。
static inline int tiny_page_box_refill(int class_idx,
TinyTLSSlab* tls,
void** out,
int max_out)
{
(void)tls; // reserved for future per-TLS hints
if (!tiny_page_box_is_enabled(class_idx)) {
return 0;
}
@ -233,7 +251,7 @@ static inline int tiny_page_box_refill(int class_idx,
return 0;
}
TinyPageBoxState* st = &g_tiny_page_box_state[class_idx];
TinyPageBoxContext* st = &g_tiny_page_box[class_idx];
if (st->num_pages == 0) {
return 0;
}

View File

@ -4,39 +4,78 @@
#include "tiny_class_policy_box.h"
#include "tiny_class_stats_box.h"
#include <stdint.h>
#include <stdio.h>
// Simple OBSERVE/LEARN rule:
// - Choose top-2 classes by shared_pool_lock and enable Page Box for them.
// - Always keep existing warm_enabled / warm_cap (policy table is already seeded).
// Simple OBSERVE/LEARN rule (auto profile only):
// - C7 は常に ON (page + warm, cap=8)
// - それ以外のクラスから score = shared_lock*4 + uc_miss の上位2つだけ page/warm を ON
// - warm_cap は C5C7:8, それ以外:4
// - スコアが 0 なら何も変更しない
void tiny_policy_learner_tick(void) {
if (!tiny_class_policy_is_auto()) {
return;
}
TinyClassStatsThread snap = {0};
tiny_class_stats_snapshot_global(&snap);
// 事前に全クラスを OFF ベースに初期化cap はデフォルト値に)
for (int c = 0; c < TINY_NUM_CLASSES; c++) {
TinyClassPolicy* p = &g_tiny_class_policy[c];
p->page_box_enabled = 0;
p->warm_enabled = 0;
p->warm_cap = (c >= 5) ? 8 : 4;
p->tls_carve_enabled = 0;
}
// C7 は常に ON
g_tiny_class_policy[7].page_box_enabled = 1;
g_tiny_class_policy[7].warm_enabled = 1;
g_tiny_class_policy[7].warm_cap = 8;
g_tiny_class_policy[7].tls_carve_enabled = 1;
// C7 を除く上位2クラスをスコアで選択
int top1 = -1, top2 = -1;
uint64_t v1 = 0, v2 = 0;
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
uint64_t v = snap.shared_lock[i];
if (v > v1) {
if (i == 7) continue;
uint64_t score = snap.shared_lock[i] * 4 + snap.uc_miss[i];
if (score > v1) {
top2 = top1;
v2 = v1;
top1 = i;
v1 = v;
} else if (v > v2) {
v1 = score;
} else if (score > v2) {
top2 = i;
v2 = v;
v2 = score;
}
}
// Nothing observed yet → leave policy untouched
// スコアが全く無い場合は C7 だけ維持
if (v1 == 0) {
return;
}
for (int c = 0; c < TINY_NUM_CLASSES; c++) {
TinyClassPolicy* p = &g_tiny_class_policy[c];
if (c == top1 || c == top2) {
p->page_box_enabled = 1;
p->warm_enabled = 1;
if (top1 >= 0) {
TinyClassPolicy* p = &g_tiny_class_policy[top1];
p->page_box_enabled = 1;
p->warm_enabled = 1;
p->tls_carve_enabled = 1;
}
if (top2 >= 0 && v2 > 0) {
TinyClassPolicy* p = &g_tiny_class_policy[top2];
p->page_box_enabled = 1;
p->warm_enabled = 1;
p->tls_carve_enabled = 1;
}
// 1-shot ログ(最多 4 回まで)
static _Atomic uint32_t auto_logs = 0;
if (tiny_policy_log_enabled()) {
uint32_t n = atomic_fetch_add_explicit(&auto_logs, 1, memory_order_relaxed);
if (n < 4) {
fprintf(stderr, "[POLICY_AUTO_UPDATE] profile=auto (top=%d/%d)\n", top1, top2);
tiny_class_policy_dump(NULL);
}
}
}

View File

@ -7,6 +7,7 @@
#include "../tiny_debug_api.h" // tiny_refill_failfast_level(), tiny_failfast_abort_ptr()
#include "c7_meta_used_counter_box.h" // C7 meta->used telemetry (Release/Debug共通)
#include "tiny_next_ptr_box.h"
#include "tiny_class_stats_box.h"
#include "../superslab/superslab_inline.h"
#include <stdatomic.h>
#include <signal.h>
@ -41,6 +42,8 @@ tiny_tls_carve_one_block(TinyTLSSlab* tls, int class_idx)
if (meta->class_idx != (uint8_t)class_idx) return res;
if (tls->slab_idx < 0 || tls->slab_idx >= ss_slabs_capacity(tls->ss)) return res;
tiny_class_stats_on_tls_carve_attempt(class_idx);
// Freelist pop
if (meta->freelist) {
#if !HAKMEM_BUILD_RELEASE
@ -61,6 +64,7 @@ tiny_tls_carve_one_block(TinyTLSSlab* tls, int class_idx)
meta->used++;
c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_TLS);
ss_active_add(tls->ss, 1);
tiny_class_stats_on_tls_carve_success(class_idx);
res.block = block;
res.path = TINY_TLS_CARVE_PATH_FREELIST;
return res;
@ -93,6 +97,7 @@ tiny_tls_carve_one_block(TinyTLSSlab* tls, int class_idx)
meta->used++;
c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_TLS);
ss_active_add(tls->ss, 1);
tiny_class_stats_on_tls_carve_success(class_idx);
res.block = block;
res.path = TINY_TLS_CARVE_PATH_LINEAR;
return res;

View File

@ -9,6 +9,7 @@
#include <stdint.h>
#include <stdatomic.h>
#include <stdio.h>
#include <stdlib.h>
#include "../hakmem_tiny_config.h"
#include "../hakmem_tiny_superslab.h"
#include "../tiny_tls.h"
@ -18,8 +19,18 @@
extern _Atomic uintptr_t g_c7_stage3_magic_ss;
static inline int warm_prefill_log_enabled(void) {
static int g_warm_log = -1;
if (__builtin_expect(g_warm_log == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_WARM_LOG");
g_warm_log = (e && *e && *e != '0') ? 1 : 0;
}
return g_warm_log;
}
static inline void warm_prefill_log_c7_meta(const char* tag, TinyTLSSlab* tls) {
if (!tls || !tls->ss) return;
if (!warm_prefill_log_enabled()) return;
#if HAKMEM_BUILD_RELEASE
static _Atomic uint32_t rel_logs = 0;
uint32_t n = atomic_fetch_add_explicit(&rel_logs, 1, memory_order_relaxed);
@ -116,7 +127,7 @@ static inline int warm_pool_do_prefill(int class_idx, TinyTLSSlab* tls, int warm
}
// C7 safety: prefer only pristine slabs (used=0 carved=0 freelist=NULL)
if (class_idx == 7) {
if (class_idx == 7 && warm_prefill_log_enabled()) {
TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
if (meta->class_idx == 7 &&
(meta->used > 0 || meta->carved > 0 || meta->freelist != NULL)) {
@ -162,7 +173,7 @@ static inline int warm_pool_do_prefill(int class_idx, TinyTLSSlab* tls, int warm
warm_pool_rel_c7_prefill_slab();
}
#else
if (class_idx == 7) {
if (class_idx == 7 && warm_prefill_log_enabled()) {
static __thread int dbg_c7_prefill_logs = 0;
if (dbg_c7_prefill_logs < 8) {
TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];

View File

@ -23,31 +23,19 @@ extern __thread TinyWarmPoolStats g_warm_pool_stats[TINY_NUM_CLASSES];
// Record a warm pool hit
// Called when warm_pool_pop() succeeds and carve produces blocks
static inline void warm_pool_record_hit(int class_idx) {
#if HAKMEM_DEBUG_COUNTERS
g_warm_pool_stats[class_idx].hits++;
#else
(void)class_idx;
#endif
}
// Record a warm pool miss
// Called when warm_pool_pop() returns NULL (pool empty)
static inline void warm_pool_record_miss(int class_idx) {
#if HAKMEM_DEBUG_COUNTERS
g_warm_pool_stats[class_idx].misses++;
#else
(void)class_idx;
#endif
}
// Record a warm pool prefill event
// Called when pool is empty and we do secondary prefill
static inline void warm_pool_record_prefilled(int class_idx) {
#if HAKMEM_DEBUG_COUNTERS
g_warm_pool_stats[class_idx].prefilled++;
#else
(void)class_idx;
#endif
}
#endif // HAK_WARM_POOL_STATS_BOX_H