Boxify superslab registry, add bench profile, and document C7 hotpath experiments

2025-12-07 03:12:27 +09:00
parent 18faa6a1c4
commit fda6cd2e67
71 changed files with 2052 additions and 286 deletions
--- a/core/box/c7_hotpath_env_box.h
+++ b/core/box/c7_hotpath_env_box.h
@ -0,0 +1,15 @@
+// c7_hotpath_env_box.h - ENV gate for C7 hotpath
+// Purpose: isolate the ENV handling so hotpath code can assume gate済み。
+#pragma once
+
+#include <stdlib.h>
+
+// ENV gate: HAKMEM_TINY_C7_HOT=1 で有効化（デフォルト OFF）
+static inline int tiny_c7_hot_enabled(void) {
+    static int g_enable = -1;
+    if (__builtin_expect(g_enable == -1, 0)) {
+        const char* e = getenv("HAKMEM_TINY_C7_HOT");
+        g_enable = (e && *e && *e != '0') ? 1 : 0;
+    }
+    return g_enable;
+}
--- a/core/box/c7_meta_used_counter_box.c
+++ b/core/box/c7_meta_used_counter_box.c
@ -0,0 +1,8 @@
+// c7_meta_used_counter_box.c
+// Definitions for C7 meta->used increment counters (Release/Debug共通)
+#include "c7_meta_used_counter_box.h"
+
+_Atomic uint64_t g_c7_meta_used_inc_total = 0;
+_Atomic uint64_t g_c7_meta_used_inc_backend = 0;
+_Atomic uint64_t g_c7_meta_used_inc_tls = 0;
+_Atomic uint64_t g_c7_meta_used_inc_front = 0;
--- a/core/box/carve_push_box.d
+++ b/core/box/carve_push_box.d
@ -17,8 +17,9 @@ core/box/carve_push_box.o: core/box/carve_push_box.c \
 core/box/../tiny_region_id.h core/box/../tiny_box_geometry.h \
 core/box/../ptr_track.h core/box/../hakmem_super_registry.h \
 core/box/../box/ss_addr_map_box.h \
- core/box/../box/../hakmem_build_flags.h core/box/../tiny_debug_api.h \
- core/box/carve_push_box.h core/box/capacity_box.h core/box/tls_sll_box.h \
+ core/box/../box/../hakmem_build_flags.h core/box/../box/super_reg_box.h \
+ core/box/../tiny_debug_api.h core/box/carve_push_box.h \
+ core/box/capacity_box.h core/box/tls_sll_box.h \
 core/box/../hakmem_internal.h core/box/../hakmem.h \
 core/box/../hakmem_config.h core/box/../hakmem_features.h \
 core/box/../hakmem_sys.h core/box/../hakmem_whale.h \
@ -70,6 +71,7 @@ core/box/../ptr_track.h:
 core/box/../hakmem_super_registry.h:
 core/box/../box/ss_addr_map_box.h:
 core/box/../box/../hakmem_build_flags.h:
+core/box/../box/super_reg_box.h:
 core/box/../tiny_debug_api.h:
 core/box/carve_push_box.h:
 core/box/capacity_box.h:
--- a/core/box/front_gate_box.d
+++ b/core/box/front_gate_box.d
@ -11,20 +11,21 @@ core/box/front_gate_box.o: core/box/front_gate_box.c \
 core/hakmem_tiny_superslab_constants.h core/superslab/superslab_inline.h \
 core/superslab/superslab_types.h core/superslab/../tiny_box_geometry.h \
 core/tiny_debug_ring.h core/tiny_remote.h core/box/ss_addr_map_box.h \
- core/box/../hakmem_build_flags.h core/tiny_debug_api.h \
- core/box/tiny_layout_box.h core/box/../hakmem_tiny_config.h \
- core/box/tiny_header_box.h core/box/tiny_layout_box.h \
- core/box/../tiny_region_id.h core/box/tls_sll_box.h \
- core/box/../hakmem_internal.h core/box/../hakmem.h \
- core/box/../hakmem_build_flags.h core/box/../hakmem_config.h \
- core/box/../hakmem_features.h core/box/../hakmem_sys.h \
- core/box/../hakmem_whale.h core/box/../box/ptr_type_box.h \
- core/box/../hakmem_debug_master.h core/box/../tiny_remote.h \
- core/box/../hakmem_tiny_integrity.h core/box/../hakmem_tiny.h \
- core/box/../ptr_track.h core/box/../ptr_trace.h \
- core/box/../hakmem_trace_master.h core/box/../hakmem_stats_master.h \
- core/box/../tiny_debug_ring.h core/box/ss_addr_map_box.h \
- core/box/../superslab/superslab_inline.h core/box/tiny_ptr_bridge_box.h \
+ core/box/../hakmem_build_flags.h core/box/super_reg_box.h \
+ core/tiny_debug_api.h core/box/tiny_layout_box.h \
+ core/box/../hakmem_tiny_config.h core/box/tiny_header_box.h \
+ core/box/tiny_layout_box.h core/box/../tiny_region_id.h \
+ core/box/tls_sll_box.h core/box/../hakmem_internal.h \
+ core/box/../hakmem.h core/box/../hakmem_build_flags.h \
+ core/box/../hakmem_config.h core/box/../hakmem_features.h \
+ core/box/../hakmem_sys.h core/box/../hakmem_whale.h \
+ core/box/../box/ptr_type_box.h core/box/../hakmem_debug_master.h \
+ core/box/../tiny_remote.h core/box/../hakmem_tiny_integrity.h \
+ core/box/../hakmem_tiny.h core/box/../ptr_track.h \
+ core/box/../ptr_trace.h core/box/../hakmem_trace_master.h \
+ core/box/../hakmem_stats_master.h core/box/../tiny_debug_ring.h \
+ core/box/ss_addr_map_box.h core/box/../superslab/superslab_inline.h \
+ core/box/tiny_ptr_bridge_box.h \
 core/box/../hakmem_tiny_superslab_internal.h \
 core/box/../hakmem_tiny_superslab.h core/box/../box/ss_hot_cold_box.h \
 core/box/../box/../superslab/superslab_types.h \
@ -63,6 +64,7 @@ core/tiny_debug_ring.h:
 core/tiny_remote.h:
 core/box/ss_addr_map_box.h:
 core/box/../hakmem_build_flags.h:
+core/box/super_reg_box.h:
 core/tiny_debug_api.h:
 core/box/tiny_layout_box.h:
 core/box/../hakmem_tiny_config.h:
--- a/core/box/front_gate_classifier.d
+++ b/core/box/front_gate_classifier.d
@ -11,8 +11,9 @@ core/box/front_gate_classifier.o: core/box/front_gate_classifier.c \
 core/box/../superslab/../tiny_box_geometry.h \
 core/box/../tiny_debug_ring.h core/box/../tiny_remote.h \
 core/box/../box/ss_addr_map_box.h \
- core/box/../box/../hakmem_build_flags.h core/box/../hakmem_tiny.h \
- core/box/../hakmem_trace.h core/box/../hakmem_tiny_mini_mag.h \
+ core/box/../box/../hakmem_build_flags.h core/box/../box/super_reg_box.h \
+ core/box/../hakmem_tiny.h core/box/../hakmem_trace.h \
+ core/box/../hakmem_tiny_mini_mag.h \
 core/box/../box/hak_lane_classify.inc.h core/box/../box/ptr_type_box.h \
 core/box/../tiny_debug_api.h core/box/../hakmem_tiny_superslab.h \
 core/box/../superslab/superslab_inline.h \
@ -38,6 +39,7 @@ core/box/../tiny_debug_ring.h:
 core/box/../tiny_remote.h:
 core/box/../box/ss_addr_map_box.h:
 core/box/../box/../hakmem_build_flags.h:
+core/box/../box/super_reg_box.h:
 core/box/../hakmem_tiny.h:
 core/box/../hakmem_trace.h:
 core/box/../hakmem_tiny_mini_mag.h:
--- a/core/box/remote_side_box.c
+++ b/core/box/remote_side_box.c
@ -0,0 +1,88 @@
+#include "remote_side_box.h"
+
+#include <stdatomic.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef REM_SIDE_LOG2
+#define REM_SIDE_LOG2 20
+#endif
+
+static _Atomic uint32_t g_remote_log2 = REM_SIDE_LOG2;
+static _Atomic uint32_t g_remote_size = (1u << REM_SIDE_LOG2);
+static _Atomic uint32_t g_remote_mask = (1u << REM_SIDE_LOG2) - 1;
+static _Atomic int g_remote_profile_inited = 0;
+static rem_side_entry* g_remote_slots = NULL;
+static _Atomic int g_remote_allocated = 0;
+
+static void remote_side_apply_profile(const char* profile) {
+    if (g_remote_profile_inited) {
+        return;
+    }
+    const char* env_profile = profile ? profile : getenv("HAKMEM_PROFILE");
+    int is_bench = (env_profile && strcmp(env_profile, "bench") == 0);
+
+    uint32_t log2 = REM_SIDE_LOG2;
+    if (is_bench && REM_SIDE_LOG2 > 4) {
+        // bench 用: ハッシュ幅だけ 1/8〜1/16 程度に論理縮小
+        log2 = REM_SIDE_LOG2 - 3;  // 1/8
+        if (log2 < 12) {
+            log2 = 12;  // 4096 entries までは確保
+        }
+    }
+
+    uint32_t size = (1u << log2);
+    uint32_t mask = size - 1;
+
+    atomic_store_explicit(&g_remote_log2, log2, memory_order_relaxed);
+    atomic_store_explicit(&g_remote_size, size, memory_order_relaxed);
+    atomic_store_explicit(&g_remote_mask, mask, memory_order_relaxed);
+    atomic_store_explicit(&g_remote_profile_inited, 1, memory_order_release);
+}
+
+void remote_side_init(RemoteSideBox* box, const char* profile) {
+    (void)box;
+    remote_side_apply_profile(profile);
+
+    if (atomic_load_explicit(&g_remote_allocated, memory_order_acquire)) {
+        return;
+    }
+
+    uint32_t size = remote_side_effective_size();
+    g_remote_slots = (rem_side_entry*)calloc(size, sizeof(rem_side_entry));
+    if (!g_remote_slots) {
+        fprintf(stderr, "[REMOTE_SIDE] failed to allocate %zu bytes\n",
+                (size_t)size * sizeof(rem_side_entry));
+        abort();
+    }
+    atomic_store_explicit(&g_remote_allocated, 1, memory_order_release);
+}
+
+uint32_t remote_side_effective_log2(void) {
+    if (!atomic_load_explicit(&g_remote_profile_inited, memory_order_acquire)) {
+        remote_side_apply_profile(NULL);
+    }
+    return atomic_load_explicit(&g_remote_log2, memory_order_relaxed);
+}
+
+uint32_t remote_side_effective_size(void) {
+    if (!atomic_load_explicit(&g_remote_profile_inited, memory_order_acquire)) {
+        remote_side_apply_profile(NULL);
+    }
+    return atomic_load_explicit(&g_remote_size, memory_order_relaxed);
+}
+
+uint32_t remote_side_effective_mask(void) {
+    if (!atomic_load_explicit(&g_remote_profile_inited, memory_order_acquire)) {
+        remote_side_apply_profile(NULL);
+    }
+    return atomic_load_explicit(&g_remote_mask, memory_order_relaxed);
+}
+
+rem_side_entry* remote_side_table(void) {
+    if (!atomic_load_explicit(&g_remote_allocated, memory_order_acquire)) {
+        remote_side_init(NULL, NULL);
+    }
+    return g_remote_slots;
+}
--- a/core/box/remote_side_box.h
+++ b/core/box/remote_side_box.h
@ -0,0 +1,21 @@
+#pragma once
+// RemoteSideBox: tiny_remote の REM_SIDE をプロファイルで論理的に絞るための薄いラッパ
+
+#include <stdint.h>
+#include <stdatomic.h>
+
+typedef struct rem_side_entry {
+    _Atomic(uintptr_t) key;  // node pointer
+    _Atomic(uintptr_t) val;  // next pointer
+} rem_side_entry;
+
+typedef struct RemoteSideBox RemoteSideBox;
+
+// profile が NULL のときは HAKMEM_PROFILE を見る。
+void remote_side_init(RemoteSideBox* box, const char* profile);
+
+// 有効サイズ/マスク（配列自体は REM_SIDE_SIZE のまま）
+uint32_t remote_side_effective_size(void);
+uint32_t remote_side_effective_mask(void);
+uint32_t remote_side_effective_log2(void);
+rem_side_entry* remote_side_table(void);
--- a/core/box/shared_pool_box.c
+++ b/core/box/shared_pool_box.c
@ -0,0 +1,50 @@
+#include "shared_pool_box.h"
+
+#include <stdatomic.h>
+#include <stdlib.h>
+#include <string.h>
+
+// 既存の g_shared_pool 配列上に「論理的な上限」だけを被せる。
+static _Atomic uint32_t g_sp_total_limit = 0;       // 0 = 無制限（現行のまま）
+static _Atomic uint32_t g_sp_class_limit = 0;       // 0 = 無制限
+static _Atomic int g_sp_profile_inited = 0;
+
+static void shared_pool_apply_profile(const char* profile) {
+    if (g_sp_profile_inited) {
+        return;
+    }
+    const char* env_profile = profile ? profile : getenv("HAKMEM_PROFILE");
+    int is_bench = (env_profile && strcmp(env_profile, "bench") == 0);
+
+    uint32_t total_limit = 0;
+    uint32_t class_limit = 0;
+    if (is_bench) {
+        // bench 用: ひとまず控えめな論理上限だけ入れる
+        total_limit = 65536;  // 元の 1M よりかなり少ない
+        class_limit = 2048;   // クラスあたりの active slot 上限の目安
+    }
+
+    atomic_store_explicit(&g_sp_total_limit, total_limit, memory_order_relaxed);
+    atomic_store_explicit(&g_sp_class_limit, class_limit, memory_order_relaxed);
+    atomic_store_explicit(&g_sp_profile_inited, 1, memory_order_release);
+}
+
+void shared_pool_box_init(SharedPoolBox* box, const char* profile) {
+    (void)box;
+    shared_pool_apply_profile(profile);
+}
+
+uint32_t shared_pool_effective_total_slots(void) {
+    if (!atomic_load_explicit(&g_sp_profile_inited, memory_order_acquire)) {
+        shared_pool_apply_profile(NULL);
+    }
+    return atomic_load_explicit(&g_sp_total_limit, memory_order_relaxed);
+}
+
+uint32_t shared_pool_effective_class_slots(int class_idx) {
+    (void)class_idx;
+    if (!atomic_load_explicit(&g_sp_profile_inited, memory_order_acquire)) {
+        shared_pool_apply_profile(NULL);
+    }
+    return atomic_load_explicit(&g_sp_class_limit, memory_order_relaxed);
+}
--- a/core/box/shared_pool_box.h
+++ b/core/box/shared_pool_box.h
@ -0,0 +1,18 @@
+#pragma once
+// SharedPoolBox: 既存の g_shared_pool の上に「論理上限」を被せる軽量ラッパ。
+// 目的:
+//   - HAKMEM_PROFILE=bench などのときに Shared Pool の増殖を論理的に抑える。
+//   - 配列サイズ自体は現状のまま（BSS をまだ縮めない）。
+
+#include <stdint.h>
+
+typedef struct SharedPoolBox SharedPoolBox;
+
+// profile が NULL のときは HAKMEM_PROFILE を読む。
+void shared_pool_box_init(SharedPoolBox* box, const char* profile);
+
+// これ以上増やさない総枠。full では元の制限なし、bench では小さめ。
+uint32_t shared_pool_effective_total_slots(void);
+
+// クラス別の論理上限（active slots がこの値を超えたら新規追加を抑制）
+uint32_t shared_pool_effective_class_slots(int class_idx);
--- a/core/box/ss_ace_box.c
+++ b/core/box/ss_ace_box.c
@ -175,8 +175,12 @@ static void ace_observe_and_decide(int k) {
    int ss_count = 0;
    uint32_t total_live = 0;

-    for (int i = 0; i < SUPER_REG_SIZE; i++) {
-        SuperRegEntry* e = &g_super_reg[i];
+    SuperRegEntry* reg = super_reg_entries();
+    int reg_cap = super_reg_effective_size();
+    if (!reg || reg_cap <= 0) return;
+
+    for (int i = 0; i < reg_cap; i++) {
+        SuperRegEntry* e = &reg[i];

        // Atomic read (thread-safe)
        uintptr_t base = atomic_load_explicit(
--- a/core/box/ss_allocation_box.c
+++ b/core/box/ss_allocation_box.c
@ -284,6 +284,10 @@ SuperSlab* superslab_allocate(uint8_t size_class) {
        }
    } while (0);

+    if (!from_cache) {
+        ss_stats_on_ss_alloc_class(size_class);
+    }
+
    return ss;
 }

--- a/core/box/ss_budget_box.c
+++ b/core/box/ss_budget_box.c
@ -0,0 +1,122 @@
+// ss_budget_box.c - Superslab Budget Box
+// Box Theory: Budget/limit guard for Superslab growth.
+// - ENV:
+//     HAKMEM_SS_BUDGET_GLOBAL : global cap (0 = unlimited, default varies)
+//     HAKMEM_SS_BUDGET_C0..C7 : per-class cap override (0 = unlimited)
+//     HAKMEM_SS_BUDGET_C7     : shorthand most often used
+// - Profile hint:
+//     HAKMEM_TINY_PROFILE=larson_guard → stricter defaults.
+
+#include "ss_budget_box.h"
+
+#include <stdatomic.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <stdio.h>
+
+#include "ss_stats_box.h"
+
+static _Atomic int g_budget_init = 0;
+static int g_ss_budget_global = 0;
+static int g_ss_budget_per_class[8] = {0};
+
+static int ss_budget_parse_env(const char* name, int fallback) {
+    const char* e = getenv(name);
+    if (e && *e) {
+        int v = atoi(e);
+        if (v < 0) v = 0;
+        return v;
+    }
+    return fallback;
+}
+
+static void ss_budget_init_once(void) {
+    if (atomic_load_explicit(&g_budget_init, memory_order_acquire)) {
+        return;
+    }
+
+    // Profile hint: larson_guard uses tighter defaults to cap RSS.
+    const char* profile = getenv("HAKMEM_TINY_PROFILE");
+    int is_larson_guard = (profile && strcasecmp(profile, "larson_guard") == 0);
+
+    // Defaults: unlimited unless larson_guard
+    int default_global = is_larson_guard ? 512 : 0;
+    g_ss_budget_global = ss_budget_parse_env("HAKMEM_SS_BUDGET_GLOBAL", default_global);
+
+    for (int i = 0; i < 8; i++) {
+        int def = 0;
+        if (is_larson_guard) {
+            // Larson guard: modest per-class caps, C7 is a bit looser.
+            def = (i == 7) ? 192 : 96;
+        }
+        g_ss_budget_per_class[i] = def;
+    }
+
+    // Per-class overrides: HAKMEM_SS_BUDGET_C7 or HAKMEM_SS_BUDGET_C{idx}
+    for (int i = 0; i < 8; i++) {
+        char buf[32];
+        snprintf(buf, sizeof(buf), "HAKMEM_SS_BUDGET_C%d", i);
+        int override = ss_budget_parse_env(buf, g_ss_budget_per_class[i]);
+        g_ss_budget_per_class[i] = override;
+    }
+    // Support the legacy shorthand HAKMEM_SS_BUDGET_C7
+    g_ss_budget_per_class[7] =
+        ss_budget_parse_env("HAKMEM_SS_BUDGET_C7", g_ss_budget_per_class[7]);
+
+    atomic_store_explicit(&g_budget_init, 1, memory_order_release);
+}
+
+static inline uint64_t ss_budget_global_live_sum(void) {
+    uint64_t sum = 0;
+    for (int i = 0; i < 8; i++) {
+        sum += atomic_load_explicit(&g_ss_live_by_class[i], memory_order_relaxed);
+    }
+    return sum;
+}
+
+bool ss_budget_on_alloc(int class_idx) {
+    ss_budget_init_once();
+
+    if (class_idx < 0 || class_idx >= 8) {
+        return true;  // outside Tiny; do not gate here
+    }
+
+    uint64_t live_cls = atomic_load_explicit(&g_ss_live_by_class[class_idx],
+                                             memory_order_relaxed);
+    int class_cap = g_ss_budget_per_class[class_idx];
+    if (class_cap > 0 && live_cls >= (uint64_t)class_cap) {
+        static _Atomic uint32_t log_once = 0;
+        if (atomic_fetch_add_explicit(&log_once, 1, memory_order_relaxed) < 4) {
+            fprintf(stderr,
+                    "[SS_BUDGET_DENY] class=%d live=%llu cap=%d\n",
+                    class_idx,
+                    (unsigned long long)live_cls,
+                    class_cap);
+        }
+        return false;
+    }
+
+    int global_cap = g_ss_budget_global;
+    if (global_cap > 0) {
+        uint64_t live_total = ss_budget_global_live_sum();
+        if (live_total >= (uint64_t)global_cap) {
+            static _Atomic uint32_t g_log_once = 0;
+            if (atomic_fetch_add_explicit(&g_log_once, 1, memory_order_relaxed) < 4) {
+                fprintf(stderr,
+                        "[SS_BUDGET_DENY_GLOBAL] live_total=%llu cap=%d class=%d\n",
+                        (unsigned long long)live_total,
+                        global_cap,
+                        class_idx);
+            }
+            return false;
+        }
+    }
+
+    return true;
+}
+
+void ss_budget_on_free(int class_idx) {
+    (void)class_idx;
+    ss_budget_init_once();
+    // We currently rely on ss_stats_on_ss_free_class() to update live counters.
+}
--- a/core/box/ss_budget_box.h
+++ b/core/box/ss_budget_box.h
@ -0,0 +1,19 @@
+// ss_budget_box.h - Superslab Budget Box
+// Box Theory: centralize budget/limit checks for Superslab allocations.
+// Responsibilities:
+//   - Read budget ENV once (global + per-class override)
+//   - Provide cheap checks before allocating new Superslabs
+//   - Allow symmetric free hook for future accounting
+
+#ifndef HAKMEM_SS_BUDGET_BOX_H
+#define HAKMEM_SS_BUDGET_BOX_H
+
+#include <stdbool.h>
+
+// Return false when allocation should be denied due to budget exhaustion.
+bool ss_budget_on_alloc(int class_idx);
+
+// Hook for future bookkeeping; currently a no-op placeholder.
+void ss_budget_on_free(int class_idx);
+
+#endif // HAKMEM_SS_BUDGET_BOX_H
--- a/core/box/ss_slab_reset_box.h
+++ b/core/box/ss_slab_reset_box.h
@ -13,12 +13,15 @@ static inline void ss_slab_reset_meta_for_tiny(SuperSlab* ss,
    if (!ss) return;
    if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) return;

+    // class_idx < 0 means "unassigned" (255). Otherwise keep the requested class.
+    uint8_t target_class = (class_idx < 0) ? 255u : (uint8_t)class_idx;
+
    TinySlabMeta* meta = &ss->slabs[slab_idx];
    meta->used = 0;
    meta->carved = 0;
    meta->freelist = NULL;
-    meta->class_idx = (uint8_t)class_idx;
-    ss->class_map[slab_idx] = (uint8_t)class_idx;
+    meta->class_idx = target_class;
+    ss->class_map[slab_idx] = target_class;

    // Reset remote queue state to avoid stale pending frees on reuse.
    atomic_store_explicit(&ss->remote_heads[slab_idx], 0, memory_order_relaxed);
--- a/core/box/ss_stats_box.c
+++ b/core/box/ss_stats_box.c
@ -1,8 +1,10 @@
 // ss_stats_box.c - SuperSlab Statistics Box Implementation
 #include "ss_stats_box.h"
+#include <stdbool.h>
 #include "../superslab/superslab_inline.h"
 #include <pthread.h>
 #include <stdio.h>
+#include <stdlib.h>

 // ============================================================================
 // Global Statistics State
@ -30,6 +32,11 @@ _Atomic uint64_t g_free_ss_enter = 0;          // hak_tiny_free_superslab() entr
 _Atomic uint64_t g_free_local_box_calls = 0;   // same-thread freelist pushes
 _Atomic uint64_t g_free_remote_box_calls = 0;  // cross-thread remote pushes

+// Superslab/slab observability (Tiny-only; relaxed updates)
+_Atomic uint64_t g_ss_live_by_class[8]   = {0};
+_Atomic uint64_t g_ss_empty_events[8]    = {0};
+_Atomic uint64_t g_slab_live_events[8]   = {0};
+
 // ============================================================================
 // Statistics Update Implementation
 // ============================================================================
@ -56,6 +63,36 @@ void ss_stats_cache_store(void) {
    pthread_mutex_unlock(&g_superslab_lock);
 }

+void ss_stats_on_ss_alloc_class(int class_idx) {
+    if (class_idx >= 0 && class_idx < 8) {
+        atomic_fetch_add_explicit(&g_ss_live_by_class[class_idx], 1, memory_order_relaxed);
+    }
+}
+
+void ss_stats_on_ss_free_class(int class_idx) {
+    if (class_idx >= 0 && class_idx < 8) {
+        // Saturating-style decrement to avoid underflow from mismatched hooks
+        uint64_t prev = atomic_load_explicit(&g_ss_live_by_class[class_idx], memory_order_relaxed);
+        if (prev > 0) {
+            atomic_fetch_sub_explicit(&g_ss_live_by_class[class_idx], 1, memory_order_relaxed);
+        }
+    }
+}
+
+void ss_stats_on_ss_scan(int class_idx, int slab_live, int is_empty) {
+    if (class_idx < 0 || class_idx >= 8) {
+        return;
+    }
+    if (slab_live > 0) {
+        atomic_fetch_add_explicit(&g_slab_live_events[class_idx],
+                                  (uint64_t)slab_live,
+                                  memory_order_relaxed);
+    }
+    if (is_empty) {
+        atomic_fetch_add_explicit(&g_ss_empty_events[class_idx], 1, memory_order_relaxed);
+    }
+}
+
 // ============================================================================
 // Statistics Reporting Implementation
 // ============================================================================
@ -92,3 +129,23 @@ void superslab_print_global_stats(void) {
    printf("Total bytes allocated: %lu MB\n", g_bytes_allocated / (1024 * 1024));
    pthread_mutex_unlock(&g_superslab_lock);
 }
+
+void ss_stats_dump_if_requested(void) {
+    const char* env = getenv("HAKMEM_SS_STATS_DUMP");
+    if (!env || !*env || *env == '0') {
+        return;
+    }
+    fprintf(stderr, "[SS_STATS] class live empty_events slab_live_events\n");
+    for (int c = 0; c < 8; c++) {
+        uint64_t live = atomic_load_explicit(&g_ss_live_by_class[c], memory_order_relaxed);
+        uint64_t empty = atomic_load_explicit(&g_ss_empty_events[c], memory_order_relaxed);
+        uint64_t slab_live = atomic_load_explicit(&g_slab_live_events[c], memory_order_relaxed);
+        if (live || empty || slab_live) {
+            fprintf(stderr, "  C%d: live=%llu empty=%llu slab_live=%llu\n",
+                    c,
+                    (unsigned long long)live,
+                    (unsigned long long)empty,
+                    (unsigned long long)slab_live);
+        }
+    }
+}
--- a/core/box/ss_stats_box.h
+++ b/core/box/ss_stats_box.h
@ -43,6 +43,16 @@ extern _Atomic uint64_t g_free_ss_enter;
 extern _Atomic uint64_t g_free_local_box_calls;
 extern _Atomic uint64_t g_free_remote_box_calls;

+// ============================================================================
+// Superslab / Slab live-state observability (Tiny classes 0..7)
+// ============================================================================
+// NOTE: These are “event-style” counters updated at key transitions
+// (alloc/free/reset) to keep overhead minimal. They are intended for
+// regression detection and coarse budgeting rather than exact gauges.
+extern _Atomic uint64_t g_ss_live_by_class[8];   // +1 on alloc, -1 on free (best-effort)
+extern _Atomic uint64_t g_ss_empty_events[8];    // Observations of fully-empty Superslabs
+extern _Atomic uint64_t g_slab_live_events[8];   // Observations of live slabs during scans
+
 // ============================================================================
 // Statistics Update API
 // ============================================================================
@ -59,6 +69,11 @@ void ss_stats_cache_reuse(void);
 // Thread-safe: mutex protected
 void ss_stats_cache_store(void);

+// Event-style observability helpers (Tiny classes only, relaxed atomics)
+void ss_stats_on_ss_alloc_class(int class_idx);
+void ss_stats_on_ss_free_class(int class_idx);
+void ss_stats_on_ss_scan(int class_idx, int slab_live, int is_empty);
+
 // ============================================================================
 // Statistics Reporting API
 // ============================================================================
@ -69,4 +84,7 @@ void superslab_print_stats(SuperSlab* ss);
 // Print global SuperSlab statistics
 void superslab_print_global_stats(void);

+// ENV: HAKMEM_SS_STATS_DUMP=1 → dump coarse Superslab/slab counters once
+void ss_stats_dump_if_requested(void);
+
 #endif // HAKMEM_SS_STATS_BOX_H
--- a/core/box/ss_tls_bind_box.h
+++ b/core/box/ss_tls_bind_box.h
@ -119,7 +119,7 @@ static inline int ss_tls_bind_one(int class_idx,
    tls->slab_base = tiny_slab_base_for(ss, slab_idx);

    // Notify Tiny Page Box (if enabled for this class)
-    tiny_page_box_on_new_slab(tls);
+    tiny_page_box_on_new_slab(class_idx, tls);

    // Sanity check: TLS must now describe this slab for this class.
    // On failure, revert TLS to safe state and return 0.
--- a/core/box/super_reg_box.c
+++ b/core/box/super_reg_box.c
@ -0,0 +1,143 @@
+#include "super_reg_box.h"
+
+#include <stdatomic.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hakmem_super_registry.h"
+
+// プロファイル別の実容量・論理上限
+static _Atomic int g_super_reg_effective_size = SUPER_REG_SIZE;
+static _Atomic int g_super_reg_effective_mask = SUPER_REG_MASK;
+static _Atomic int g_super_reg_effective_per_class = SUPER_REG_PER_CLASS;
+static _Atomic int g_super_reg_profile_inited = 0;
+
+// 動的に確保する実配列
+static SuperRegEntry* g_super_reg_entries = NULL;
+static SuperSlab** g_super_reg_by_class_slots = NULL;
+static int g_super_reg_by_class_stride = SUPER_REG_PER_CLASS;
+static _Atomic int g_super_reg_allocated = 0;
+
+static inline int super_reg_clamp_power_of_two(int requested, int fallback) {
+    // SUPER_REG_SIZE は 2 のべき乗なので、requested もそれ未満のべき乗に丸める。
+    if (requested <= 0 || requested > SUPER_REG_SIZE) {
+        return fallback;
+    }
+    // 丸め: 最上位ビットだけを残す（2 のべき乗に丸め下げ）
+    int v = requested;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v = v - (v >> 1);
+    // 有効値は最低でも 1024 にしておく
+    if (v < 1024) {
+        v = 1024;
+    }
+    return v;
+}
+
+static void super_reg_apply_profile(const char* profile) {
+    if (g_super_reg_profile_inited) {
+        return;
+    }
+
+    const char* env_profile = profile ? profile : getenv("HAKMEM_PROFILE");
+    const int is_bench = (env_profile && strcmp(env_profile, "bench") == 0);
+
+    int eff_size = SUPER_REG_SIZE;
+    int eff_per_class = SUPER_REG_PER_CLASS;
+
+    if (is_bench) {
+        // 論理上の利用範囲だけ縮める（配列は従来サイズのまま）
+        eff_size = SUPER_REG_SIZE >> 3;          // 1/8 に論理制限
+        eff_per_class = SUPER_REG_PER_CLASS >> 4;  // 1/16
+    }
+
+    eff_size = super_reg_clamp_power_of_two(eff_size, SUPER_REG_SIZE);
+    eff_per_class = eff_per_class > 0 ? eff_per_class : SUPER_REG_PER_CLASS;
+
+    atomic_store_explicit(&g_super_reg_effective_size, eff_size, memory_order_relaxed);
+    atomic_store_explicit(&g_super_reg_effective_mask, eff_size - 1, memory_order_relaxed);
+    atomic_store_explicit(&g_super_reg_effective_per_class,
+                          eff_per_class,
+                          memory_order_relaxed);
+    atomic_store_explicit(&g_super_reg_profile_inited, 1, memory_order_release);
+}
+
+void super_reg_init(SuperRegBox* box, const char* profile) {
+    (void)box;
+    super_reg_apply_profile(profile);
+
+    if (atomic_load_explicit(&g_super_reg_allocated, memory_order_acquire)) {
+        return;
+    }
+
+    int eff_size = super_reg_effective_size();
+    int per_class = super_reg_effective_per_class();
+
+    // Allocate registry table
+    size_t reg_bytes = (size_t)eff_size * sizeof(SuperRegEntry);
+    g_super_reg_entries = (SuperRegEntry*)calloc(eff_size, sizeof(SuperRegEntry));
+    if (!g_super_reg_entries) {
+        fprintf(stderr, "[SUPER_REG] failed to allocate %zu bytes for registry\n", reg_bytes);
+        abort();
+    }
+
+    // Allocate per-class table (contiguous 1D block)
+    size_t per_class_bytes = (size_t)TINY_NUM_CLASSES * (size_t)per_class * sizeof(SuperSlab*);
+    g_super_reg_by_class_slots = (SuperSlab**)calloc(TINY_NUM_CLASSES * (size_t)per_class,
+                                                     sizeof(SuperSlab*));
+    if (!g_super_reg_by_class_slots) {
+        fprintf(stderr, "[SUPER_REG] failed to allocate %zu bytes for per-class registry\n",
+                per_class_bytes);
+        abort();
+    }
+    g_super_reg_by_class_stride = per_class;
+
+    atomic_store_explicit(&g_super_reg_allocated, 1, memory_order_release);
+}
+
+int super_reg_effective_size(void) {
+    if (!atomic_load_explicit(&g_super_reg_profile_inited, memory_order_acquire)) {
+        super_reg_apply_profile(NULL);
+    }
+    return atomic_load_explicit(&g_super_reg_effective_size, memory_order_relaxed);
+}
+
+int super_reg_effective_mask(void) {
+    if (!atomic_load_explicit(&g_super_reg_profile_inited, memory_order_acquire)) {
+        super_reg_apply_profile(NULL);
+    }
+    return atomic_load_explicit(&g_super_reg_effective_mask, memory_order_relaxed);
+}
+
+int super_reg_effective_per_class(void) {
+    if (!atomic_load_explicit(&g_super_reg_profile_inited, memory_order_acquire)) {
+        super_reg_apply_profile(NULL);
+    }
+    return atomic_load_explicit(&g_super_reg_effective_per_class, memory_order_relaxed);
+}
+
+SuperRegEntry* super_reg_entries(void) {
+    if (!atomic_load_explicit(&g_super_reg_allocated, memory_order_acquire)) {
+        super_reg_init(NULL, NULL);
+    }
+    return g_super_reg_entries;
+}
+
+SuperSlab** super_reg_by_class_slots(void) {
+    if (!atomic_load_explicit(&g_super_reg_allocated, memory_order_acquire)) {
+        super_reg_init(NULL, NULL);
+    }
+    return g_super_reg_by_class_slots;
+}
+
+int super_reg_by_class_stride(void) {
+    if (!atomic_load_explicit(&g_super_reg_allocated, memory_order_acquire)) {
+        super_reg_init(NULL, NULL);
+    }
+    return g_super_reg_by_class_stride;
+}
--- a/core/box/super_reg_box.h
+++ b/core/box/super_reg_box.h
@ -0,0 +1,77 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stddef.h>
+#ifndef TINY_NUM_CLASSES
+#define TINY_NUM_CLASSES 8
+#endif
+// SuperRegBox (設計メモ / API スタブ)
+// -------------------------------------
+// 役割:
+//   - g_super_reg / g_super_reg_by_class への直接依存を断ち、レジストリ容量を
+//     プロファイル（full/prod/bench/larson_guard 等）で切り替えられるようにする箱。
+//   - Box 内部だけで容量決定・確保・破棄を閉じ、外側は薄い API を呼ぶだけにする。
+//
+// プロファイル方針（案）:
+//   - full/prod : 現行の SUPER_REG_SIZE (=1,048,576) と SUPER_REG_PER_CLASS (=16,384) を維持
+//   - bench     : SUPER_REG_SIZE を 1/16〜1/8 程度 (例: 65,536)、per-class は 1,024 などに縮小
+//   - guard     : bench 同等かさらに小さくして fail-fast（ENOMEM）を優先
+//
+// スレッド安全性:
+//   - 既存のロック/atomic 公算を流用しつつ、構造体にまとめて「初期化済みか」を判定。
+//
+// 想定 API（実装は今後）:
+
+typedef struct SuperSlab SuperSlab;
+typedef struct SuperRegBox SuperRegBox;
+struct SuperRegEntry;
+
+// プロファイル/ENV に応じて容量を決定し、内部配列を確保。
+// profile が NULL のときは HAKMEM_PROFILE (bench / full など) を読む。
+void super_reg_init(SuperRegBox* box, const char* profile);
+
+// 現在有効なスロット数/マスク
+int super_reg_effective_size(void);
+int super_reg_effective_mask(void);
+int super_reg_effective_per_class(void);
+
+// レジストリ実体へのアクセス（Box 内部で動的確保）
+struct SuperRegEntry* super_reg_entries(void);
+SuperSlab** super_reg_by_class_slots(void);
+int super_reg_by_class_stride(void);
+static inline SuperSlab* super_reg_by_class_at(int class_idx, int idx) {
+    SuperSlab** slots = super_reg_by_class_slots();
+    int stride = super_reg_by_class_stride();
+    if (!slots || stride <= 0 || class_idx < 0 || idx < 0 ||
+        class_idx >= TINY_NUM_CLASSES || idx >= stride) {
+        return NULL;
+    }
+    return slots[class_idx * stride + idx];
+}
+static inline void super_reg_by_class_set(int class_idx, int idx, SuperSlab* ss) {
+    SuperSlab** slots = super_reg_by_class_slots();
+    int stride = super_reg_by_class_stride();
+    if (!slots || stride <= 0 || class_idx < 0 || idx < 0 ||
+        class_idx >= TINY_NUM_CLASSES || idx >= stride) {
+        return;
+    }
+    slots[class_idx * stride + idx] = ss;
+}
+
+// Superslab 登録/解除（既存の hak_super_register/unregister 相当を箱内に閉じ込める）
+bool super_reg_register(SuperRegBox* box, SuperSlab* ss, uint32_t class_idx);
+void super_reg_unregister(SuperRegBox* box, SuperSlab* ss, uint32_t class_idx);
+
+// アドレス検索／クラス別イテレーション（必要最小限の薄い API）
+SuperSlab* super_reg_find_by_addr(SuperRegBox* box, void* ptr);
+SuperSlab* super_reg_iter_for_class(SuperRegBox* box, uint32_t class_idx, void** cursor);
+
+// 将来のメモリ削減策（コメントのみ）
+//  - g_super_reg/g_super_reg_by_class を「malloc/mmap でプロファイル毎に確保」するようにし、
+//    BSS から切り離す。
+//  - bench プロファイルでは固定長を大幅に縮め、足りなければ ENOMEM を返して fail-fast。
+//  - prod では現行サイズを維持しつつ、Box 境界でのみアクセスさせる。***
+
+// 前方宣言（実装は既存の superslab に依存）
+// typedef struct SuperSlab SuperSlab;  // 上で宣言済み
--- a/core/box/tiny_c7_hotpath_box.h
+++ b/core/box/tiny_c7_hotpath_box.h
@ -0,0 +1,63 @@
+// C7 専用の実験的ホットパス。HAKMEM_TINY_C7_HOT=1 でのみ有効化し、
+// デフォルト（未設定/0）のときは従来経路に完全フォールバックする。
+// 本番デフォルトで ON にしない前提の A/B 用スイッチ。
+#pragma once
+
+#include "../hakmem_build_flags.h"
+#include "c7_hotpath_env_box.h"
+#include "tiny_c7_uc_hit_box.h"
+#include "tiny_c7_warm_spill_box.h"
+#include "tiny_c7_stats_sample_box.h"
+#include "tiny_front_hot_box.h"
+#include "tiny_front_cold_box.h"
+#include "front_gate_box.h"
+#include "tls_sll_box.h"
+#include "ptr_conversion_box.h"
+
+// C7 alloc ホットパス。
+// 順序:
+//   1) TLS/SFC (front_gate_try_pop) を先に覗く
+//   2) Unified Cache のヒット専用パス tiny_uc_pop_c7_hit_only()
+//   3) それでもダメなら通常の cold refill（refill/統計は cold 側に任せる）
+static inline void* tiny_c7_alloc_hot(size_t size) {
+    (void)size;  // size は class_idx=7 前提なので未使用
+    void* user = NULL;
+
+    // 1) SFC/TLS SLL 直叩き（ユーザーポインタが返る）
+    if (front_gate_try_pop(/*class_idx=*/7, &user)) {
+        return user;
+    }
+
+    // 2) Unified Cache ヒット
+    user = tiny_uc_pop_c7_hit_only();
+    if (__builtin_expect(user != NULL, 1)) {
+        return user;
+    }
+
+    // 3) Cold refill へフォールバック
+    return tiny_cold_refill_and_alloc(7);
+}
+
+// C7 free ホットパス。BASE を受け取り TLS→UC の順に試す。
+static inline int tiny_c7_free_hot(void* base) {
+    // 1) TLS SLL へ直接 push（BASE のまま渡す）
+    extern int g_tls_sll_enable;
+    if (__builtin_expect(g_tls_sll_enable, 1)) {
+        if (tls_sll_push(7, HAK_BASE_FROM_RAW(base), UINT32_MAX)) {
+            return 1;
+        }
+    }
+
+    // 2) Unified Cache へ push（ヒット専用の軽量版）
+    if (tiny_uc_push_c7_hot(base)) {
+        return 1;
+    }
+
+    // 3) Warm spill（将来用のフック）
+    if (tiny_c7_warm_spill_one(base)) {
+        return 1;
+    }
+
+    // 4) 最後に cold free パスへフォールバック
+    return tiny_cold_drain_and_free(7, base);
+}
--- a/core/box/tiny_c7_stats_sample_box.h
+++ b/core/box/tiny_c7_stats_sample_box.h
@ -0,0 +1,9 @@
+// tiny_c7_stats_sample_box.h - Lightweight sampling helper for C7 stats
+// 現状は簡易 1/16 サンプリング。hot path から #if を排除するための小箱。
+#pragma once
+
+static inline int tiny_c7_stats_sample(void) {
+    static __thread unsigned counter = 0;
+    counter++;
+    return (counter & 0xF) == 0;  // 約 1/16
+}
--- a/core/box/tiny_c7_uc_hit_box.h
+++ b/core/box/tiny_c7_uc_hit_box.h
@ -0,0 +1,58 @@
+// tiny_c7_uc_hit_box.h - C7 専用 Unified Cache hit-only helpers
+// 契約: ヒット時のみ処理。ミス時は NULL/0 を返し、refill・統計は行わない。
+#pragma once
+
+#include "../front/tiny_unified_cache.h"
+#include "tiny_layout_box.h"
+
+// C7 UC ヒット専用 pop
+static inline void* tiny_uc_pop_c7_hit_only(void) {
+    TinyUnifiedCache* cache = &g_unified_cache[7];
+
+#if !HAKMEM_TINY_FRONT_PGO
+    if (__builtin_expect(cache->slots == NULL, 0)) {
+        unified_cache_init();
+        if (cache->slots == NULL) {
+            return NULL;
+        }
+    }
+#endif
+
+    if (__builtin_expect(cache->head == cache->tail, 0)) {
+        return NULL;
+    }
+
+    void* base = cache->slots[cache->head];
+    cache->head = (cache->head + 1) & cache->mask;
+
+#if HAKMEM_TINY_HEADER_CLASSIDX
+    tiny_region_id_write_header(base, 7);
+    size_t user_offset = tiny_user_offset(7);
+    return (void*)((char*)base + user_offset);
+#else
+    return base;
+#endif
+}
+
+// C7 UC ヒット専用 push
+static inline int tiny_uc_push_c7_hot(void* base) {
+    TinyUnifiedCache* cache = &g_unified_cache[7];
+
+#if !HAKMEM_TINY_FRONT_PGO
+    if (__builtin_expect(cache->slots == NULL, 0)) {
+        unified_cache_init();
+        if (cache->slots == NULL) {
+            return 0;
+        }
+    }
+#endif
+
+    uint16_t next_tail = (cache->tail + 1) & cache->mask;
+    if (__builtin_expect(next_tail == cache->head, 0)) {
+        return 0;  // full
+    }
+
+    cache->slots[cache->tail] = base;
+    cache->tail = next_tail;
+    return 1;
+}
--- a/core/box/tiny_c7_warm_spill_box.h
+++ b/core/box/tiny_c7_warm_spill_box.h
@ -0,0 +1,9 @@
+// tiny_c7_warm_spill_box.h - C7 Warm spill hook (placeholder)
+// Purpose: allow swapping spill実装 without touchingホットパス。
+#pragma once
+
+// いまは no-op。将来 Warm spill を挿すときに差し替える。
+static inline int tiny_c7_warm_spill_one(void* base) {
+    (void)base;
+    return 0;
+}
--- a/core/box/tiny_class_policy_box.c
+++ b/core/box/tiny_class_policy_box.c
@ -6,17 +6,20 @@
 #include <string.h>
 #include <strings.h>
 #include "tiny_policy_learner_box.h"
+#include "tiny_mem_stats_box.h"

 TinyClassPolicy g_tiny_class_policy[TINY_NUM_CLASSES];
 static _Atomic int g_tiny_class_policy_init_done = 0;
 static _Atomic int g_tiny_class_policy_logged = 0;
 static _Atomic int g_tiny_class_policy_profile_auto = 0;
+static _Atomic int g_tiny_class_policy_mem_recorded = 0;

 static inline TinyClassPolicy tiny_class_policy_default_entry(void) {
    TinyClassPolicy p = {0};
    p.page_box_enabled = 0;
    p.warm_enabled = 0;
    p.warm_cap = 0;
+    p.tls_carve_enabled = 0;
    return p;
 }

@ -30,6 +33,7 @@ static void tiny_class_policy_set_legacy(void) {
    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        g_tiny_class_policy[i].warm_enabled = 1;
        g_tiny_class_policy[i].warm_cap = (i < 5) ? 4 : 8;
+        g_tiny_class_policy[i].tls_carve_enabled = (i >= 5) ? 1 : 0;
    }
    for (int i = 5; i < TINY_NUM_CLASSES; i++) {
        g_tiny_class_policy[i].page_box_enabled = 1;
@ -45,6 +49,7 @@ static void tiny_class_policy_set_c5_7_only(void) {
        g_tiny_class_policy[i].page_box_enabled = 1;
        g_tiny_class_policy[i].warm_enabled = 1;
        g_tiny_class_policy[i].warm_cap = 8;
+        g_tiny_class_policy[i].tls_carve_enabled = 1;
    }
 }

@ -53,6 +58,18 @@ static void tiny_class_policy_set_tinyplus_all(void) {
    tiny_class_policy_set_legacy();
 }

+static void tiny_class_policy_set_larson_guard(void) {
+    // Start from legacy, then tighten warm caps to reduce RSS for larson-style loads.
+    tiny_class_policy_set_legacy();
+    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
+        if (i < 5) {
+            g_tiny_class_policy[i].warm_cap = 2;
+        } else {
+            g_tiny_class_policy[i].warm_cap = 4;
+        }
+    }
+}
+
 static void tiny_class_policy_set_auto(void) {
    // auto プロファイルは legacy をベースにして、後段の learner に委譲
    tiny_class_policy_set_legacy();
@ -72,6 +89,10 @@ static const char* tiny_class_policy_set_profile(const char* profile) {
        tiny_class_policy_set_tinyplus_all();
        atomic_store_explicit(&g_tiny_class_policy_profile_auto, 0, memory_order_release);
        return "tinyplus_all";
+    } else if (strcasecmp(profile, "larson_guard") == 0) {
+        tiny_class_policy_set_larson_guard();
+        atomic_store_explicit(&g_tiny_class_policy_profile_auto, 0, memory_order_release);
+        return "larson_guard";
    } else if (strcasecmp(profile, "auto") == 0) {
        tiny_class_policy_set_auto();
        return "auto";
@ -84,16 +105,20 @@ static const char* tiny_class_policy_set_profile(const char* profile) {
 }

 void tiny_class_policy_dump(const char* tag) {
+    if (!tiny_policy_log_enabled()) {
+        return;
+    }
    const char* header = tag ? tag : "[POLICY_DUMP]";
    fprintf(stderr, "%s\n", header);
    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
        TinyClassPolicy* p = &g_tiny_class_policy[cls];
        fprintf(stderr,
-                "  C%d: page=%u warm=%u cap=%u\n",
+                "  C%d: page=%u warm=%u cap=%u tls_carve=%u\n",
                cls,
                p->page_box_enabled,
                p->warm_enabled,
-                p->warm_cap);
+                p->warm_cap,
+                p->tls_carve_enabled);
    }
 }

@ -105,8 +130,13 @@ void tiny_class_policy_init_once(void) {
    const char* profile = getenv("HAKMEM_TINY_POLICY_PROFILE");
    const char* active_profile = tiny_class_policy_set_profile(profile);

+    if (atomic_exchange_explicit(&g_tiny_class_policy_mem_recorded, 1, memory_order_acq_rel) == 0) {
+        tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_policy));
+    }
+
    // 1-shot ダンプでポリシーの内容を可視化（デバッグ用）
-    if (atomic_exchange_explicit(&g_tiny_class_policy_logged, 1, memory_order_acq_rel) == 0) {
+    if (tiny_policy_log_enabled() &&
+        atomic_exchange_explicit(&g_tiny_class_policy_logged, 1, memory_order_acq_rel) == 0) {
        fprintf(stderr, "[POLICY_INIT] profile=%s\n", active_profile);
        tiny_class_policy_dump(NULL);
    }
@ -121,3 +151,8 @@ void tiny_class_policy_refresh_auto(void) {
    }
    tiny_policy_learner_tick();
 }
+
+int tiny_class_policy_is_auto(void) {
+    tiny_class_policy_init_once();
+    return atomic_load_explicit(&g_tiny_class_policy_profile_auto, memory_order_acquire);
+}
--- a/core/box/tiny_class_policy_box.h
+++ b/core/box/tiny_class_policy_box.h
@ -15,23 +15,37 @@

 #include <stdatomic.h>
 #include <stdint.h>
+#include <stdlib.h>
 #include "../hakmem_tiny_config.h"

 typedef struct TinyClassPolicy {
    uint8_t page_box_enabled;  // Enable Tiny Page Box for this class
    uint8_t warm_enabled;      // Enable Warm Pool for this class
    uint8_t warm_cap;          // Max warm SuperSlabs to keep (per-thread)
-    uint8_t reserved;
+    uint8_t tls_carve_enabled; // Enable Warm→TLS carve experiment for this class
 } TinyClassPolicy;

 extern TinyClassPolicy g_tiny_class_policy[TINY_NUM_CLASSES];

+// ENV-gated policy logging (default ON; disable with HAKMEM_TINY_POLICY_LOG=0)
+static inline int tiny_policy_log_enabled(void) {
+    static int g_policy_log = -1;
+    if (__builtin_expect(g_policy_log == -1, 0)) {
+        const char* e = getenv("HAKMEM_TINY_POLICY_LOG");
+        g_policy_log = (e && *e && *e != '0') ? 1 : 0;
+    }
+    return g_policy_log;
+}
+
 // Initialize policy table once (idempotent).
 void tiny_class_policy_init_once(void);

 // Refresh auto profile based on learner output (no-op for non-auto profiles)
 void tiny_class_policy_refresh_auto(void);

+// True when active profile is "auto" (learner-managed)
+int tiny_class_policy_is_auto(void);
+
 // Debug helper: dump current policy (tag optional)
 void tiny_class_policy_dump(const char* tag);

--- a/core/box/tiny_class_stats_box.c
+++ b/core/box/tiny_class_stats_box.c
@ -1,6 +1,7 @@
 // tiny_class_stats_box.c - Thread-local stats storage for Tiny classes

 #include "tiny_class_stats_box.h"
+#include "tiny_mem_stats_box.h"
 #include <stdio.h>
 #include <string.h>

@ -8,6 +9,20 @@ __thread TinyClassStatsThread g_tiny_class_stats = {0};
 _Atomic uint64_t g_tiny_class_stats_uc_miss_global[TINY_NUM_CLASSES] = {0};
 _Atomic uint64_t g_tiny_class_stats_warm_hit_global[TINY_NUM_CLASSES] = {0};
 _Atomic uint64_t g_tiny_class_stats_shared_lock_global[TINY_NUM_CLASSES] = {0};
+_Atomic uint64_t g_tiny_class_stats_tls_carve_attempt_global[TINY_NUM_CLASSES] = {0};
+_Atomic uint64_t g_tiny_class_stats_tls_carve_success_global[TINY_NUM_CLASSES] = {0};
+static _Atomic int g_tiny_class_stats_mem_recorded = 0;
+
+static void tiny_class_stats_record_mem_once(void) {
+    if (atomic_exchange_explicit(&g_tiny_class_stats_mem_recorded, 1, memory_order_acq_rel) == 0) {
+        tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_stats));
+        tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_stats_uc_miss_global));
+        tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_stats_warm_hit_global));
+        tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_stats_shared_lock_global));
+        tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_stats_tls_carve_attempt_global));
+        tiny_mem_stats_add_policy_stats((ssize_t)sizeof(g_tiny_class_stats_tls_carve_success_global));
+    }
+}

 void tiny_class_stats_reset_thread(void) {
    memset(&g_tiny_class_stats, 0, sizeof(g_tiny_class_stats));
@ -15,11 +30,13 @@ void tiny_class_stats_reset_thread(void) {

 void tiny_class_stats_snapshot_thread(TinyClassStatsThread* out) {
    if (!out) return;
+    tiny_class_stats_record_mem_once();
    memcpy(out, &g_tiny_class_stats, sizeof(*out));
 }

 void tiny_class_stats_snapshot_global(TinyClassStatsThread* out) {
    if (!out) return;
+    tiny_class_stats_record_mem_once();
    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        out->uc_miss[i] = atomic_load_explicit(&g_tiny_class_stats_uc_miss_global[i],
                                               memory_order_relaxed);
@ -27,6 +44,10 @@ void tiny_class_stats_snapshot_global(TinyClassStatsThread* out) {
                                                memory_order_relaxed);
        out->shared_lock[i] = atomic_load_explicit(&g_tiny_class_stats_shared_lock_global[i],
                                                   memory_order_relaxed);
+        out->tls_carve_attempt[i] = atomic_load_explicit(
+            &g_tiny_class_stats_tls_carve_attempt_global[i], memory_order_relaxed);
+        out->tls_carve_success[i] = atomic_load_explicit(
+            &g_tiny_class_stats_tls_carve_success_global[i], memory_order_relaxed);
    }
 }

@ -34,14 +55,18 @@ static void tiny_class_stats_dump_common(FILE* out,
                                         const char* tag,
                                         const TinyClassStatsThread* stats) {
    if (!(out && stats)) return;
-    fprintf(out, "%s class uc_miss warm_hit shared_lock\n", tag ? tag : "[STATS]");
+    fprintf(out, "%s class uc_miss warm_hit shared_lock tls_carve_attempt tls_carve_success\n",
+            tag ? tag : "[STATS]");
    for (int c = 0; c < TINY_NUM_CLASSES; c++) {
-        if (stats->uc_miss[c] || stats->warm_hit[c] || stats->shared_lock[c]) {
-            fprintf(out, "  C%d: %llu %llu %llu\n",
+        if (stats->uc_miss[c] || stats->warm_hit[c] || stats->shared_lock[c] ||
+            stats->tls_carve_attempt[c] || stats->tls_carve_success[c]) {
+            fprintf(out, "  C%d: %llu %llu %llu %llu %llu\n",
                    c,
                    (unsigned long long)stats->uc_miss[c],
                    (unsigned long long)stats->warm_hit[c],
-                    (unsigned long long)stats->shared_lock[c]);
+                    (unsigned long long)stats->shared_lock[c],
+                    (unsigned long long)stats->tls_carve_attempt[c],
+                    (unsigned long long)stats->tls_carve_success[c]);
        }
    }
 }
--- a/core/box/tiny_class_stats_box.h
+++ b/core/box/tiny_class_stats_box.h
@ -16,6 +16,8 @@ typedef struct TinyClassStatsThread {
    uint64_t uc_miss[TINY_NUM_CLASSES];    // unified_cache_refill() hits
    uint64_t warm_hit[TINY_NUM_CLASSES];   // warm pool successes
    uint64_t shared_lock[TINY_NUM_CLASSES]; // shared pool lock acquisitions (hook as needed)
+    uint64_t tls_carve_attempt[TINY_NUM_CLASSES]; // Warm/TLS carve attempts
+    uint64_t tls_carve_success[TINY_NUM_CLASSES]; // Warm/TLS carve successes
 } TinyClassStatsThread;

 extern __thread TinyClassStatsThread g_tiny_class_stats;
@ -24,6 +26,8 @@ extern __thread TinyClassStatsThread g_tiny_class_stats;
 extern _Atomic uint64_t g_tiny_class_stats_uc_miss_global[TINY_NUM_CLASSES];
 extern _Atomic uint64_t g_tiny_class_stats_warm_hit_global[TINY_NUM_CLASSES];
 extern _Atomic uint64_t g_tiny_class_stats_shared_lock_global[TINY_NUM_CLASSES];
+extern _Atomic uint64_t g_tiny_class_stats_tls_carve_attempt_global[TINY_NUM_CLASSES];
+extern _Atomic uint64_t g_tiny_class_stats_tls_carve_success_global[TINY_NUM_CLASSES];

 static inline void tiny_class_stats_on_uc_miss(int ci) {
    if (ci >= 0 && ci < TINY_NUM_CLASSES) {
@ -49,6 +53,22 @@ static inline void tiny_class_stats_on_shared_lock(int ci) {
    }
 }

+static inline void tiny_class_stats_on_tls_carve_attempt(int ci) {
+    if (ci >= 0 && ci < TINY_NUM_CLASSES) {
+        g_tiny_class_stats.tls_carve_attempt[ci]++;
+        atomic_fetch_add_explicit(&g_tiny_class_stats_tls_carve_attempt_global[ci],
+                                  1, memory_order_relaxed);
+    }
+}
+
+static inline void tiny_class_stats_on_tls_carve_success(int ci) {
+    if (ci >= 0 && ci < TINY_NUM_CLASSES) {
+        g_tiny_class_stats.tls_carve_success[ci]++;
+        atomic_fetch_add_explicit(&g_tiny_class_stats_tls_carve_success_global[ci],
+                                  1, memory_order_relaxed);
+    }
+}
+
 // Optional: reset per-thread counters (cold path only).
 void tiny_class_stats_reset_thread(void);

--- a/core/box/tiny_mem_stats_box.c
+++ b/core/box/tiny_mem_stats_box.c
@ -0,0 +1,65 @@
+// tiny_mem_stats_box.c - Memory accounting helpers for Tiny front components
+
+#include "tiny_mem_stats_box.h"
+
+#include <stdatomic.h>
+#include <sys/types.h>
+#include <stdio.h>
+
+_Atomic long long g_tiny_mem_unified_cache_bytes = 0;
+_Atomic long long g_tiny_mem_warm_pool_bytes = 0;
+_Atomic long long g_tiny_mem_page_box_bytes = 0;
+_Atomic long long g_tiny_mem_tls_magazine_bytes = 0;
+_Atomic long long g_tiny_mem_policy_stats_bytes = 0;
+
+static inline void tiny_mem_stats_add(_Atomic long long* target, ssize_t bytes) {
+    if (!target || bytes == 0) {
+        return;
+    }
+    atomic_fetch_add_explicit(target, (long long)bytes, memory_order_relaxed);
+}
+
+void tiny_mem_stats_add_unified(ssize_t bytes) {
+    tiny_mem_stats_add(&g_tiny_mem_unified_cache_bytes, bytes);
+}
+
+void tiny_mem_stats_add_warm(ssize_t bytes) {
+    tiny_mem_stats_add(&g_tiny_mem_warm_pool_bytes, bytes);
+}
+
+void tiny_mem_stats_add_pagebox(ssize_t bytes) {
+    tiny_mem_stats_add(&g_tiny_mem_page_box_bytes, bytes);
+}
+
+void tiny_mem_stats_add_tls_magazine(ssize_t bytes) {
+    tiny_mem_stats_add(&g_tiny_mem_tls_magazine_bytes, bytes);
+}
+
+void tiny_mem_stats_add_policy_stats(ssize_t bytes) {
+    tiny_mem_stats_add(&g_tiny_mem_policy_stats_bytes, bytes);
+}
+
+void tiny_mem_stats_dump(void) {
+    long long unified = atomic_load_explicit(&g_tiny_mem_unified_cache_bytes,
+                                             memory_order_relaxed);
+    long long warm = atomic_load_explicit(&g_tiny_mem_warm_pool_bytes,
+                                          memory_order_relaxed);
+    long long pagebox = atomic_load_explicit(&g_tiny_mem_page_box_bytes,
+                                             memory_order_relaxed);
+    long long tls_mag = atomic_load_explicit(&g_tiny_mem_tls_magazine_bytes,
+                                             memory_order_relaxed);
+    long long policy_stats = atomic_load_explicit(&g_tiny_mem_policy_stats_bytes,
+                                                  memory_order_relaxed);
+
+    long long total = unified + warm + pagebox + tls_mag + policy_stats;
+
+    fprintf(stderr,
+            "[TINY_MEM_STATS] unified_cache=%lldKB warm_pool=%lldKB page_box=%lldKB "
+            "tls_mag=%lldKB policy_stats=%lldKB total=%lldKB\n",
+            unified / 1024,
+            warm / 1024,
+            pagebox / 1024,
+            tls_mag / 1024,
+            policy_stats / 1024,
+            total / 1024);
+}
--- a/core/box/tiny_mem_stats_box.h
+++ b/core/box/tiny_mem_stats_box.h
@ -0,0 +1,38 @@
+// tiny_mem_stats_box.h - Lightweight memory accounting for Tiny front boxes
+//
+// Purpose:
+//   - Provide coarse-grained byte counters for major Tiny front allocations
+//     (Unified Cache buffers, Warm Pool TLS state, Page Box TLS state,
+//      TLS magazine/front caches, and policy/stats tables).
+//   - Keep overhead near-zero: helpers are simple fetch-adds, typically called
+//     at init time when the structures are allocated.
+//
+// Usage:
+//   - Call tiny_mem_stats_add_*() at allocation/free sites (positive/negative).
+//   - Call tiny_mem_stats_dump() when HAKMEM_TINY_MEM_DUMP is set to emit one
+//     summary line to stderr (values reported in KB).
+
+#ifndef TINY_MEM_STATS_BOX_H
+#define TINY_MEM_STATS_BOX_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+// Byte counters (signed to allow subtracting on free paths)
+extern _Atomic long long g_tiny_mem_unified_cache_bytes;
+extern _Atomic long long g_tiny_mem_warm_pool_bytes;
+extern _Atomic long long g_tiny_mem_page_box_bytes;
+extern _Atomic long long g_tiny_mem_tls_magazine_bytes;
+extern _Atomic long long g_tiny_mem_policy_stats_bytes;
+
+void tiny_mem_stats_add_unified(ssize_t bytes);
+void tiny_mem_stats_add_warm(ssize_t bytes);
+void tiny_mem_stats_add_pagebox(ssize_t bytes);
+void tiny_mem_stats_add_tls_magazine(ssize_t bytes);
+void tiny_mem_stats_add_policy_stats(ssize_t bytes);
+
+// Dump one line summary (values in KB) if hooked by caller.
+void tiny_mem_stats_dump(void);
+
+#endif // TINY_MEM_STATS_BOX_H
--- a/core/box/tiny_page_box.c
+++ b/core/box/tiny_page_box.c
@ -1,6 +1,5 @@
 #include "tiny_page_box.h"

 // TLS state definitions for Tiny Page Box
-__thread TinyPageBoxState g_tiny_page_box_state[TINY_NUM_CLASSES];
+__thread TinyPageBoxContext g_tiny_page_box[TINY_NUM_CLASSES];
 __thread int g_tiny_page_box_init_done = 0;
-
--- a/core/box/tiny_page_box.h
+++ b/core/box/tiny_page_box.h
@ -9,7 +9,7 @@
 //   - API is generic over class_idx (0-7), but enabled-classes are controlled
 //     by ENV so that we can start with C7 only and later extend to C5/C6.
 //   - When enabled for a class:
-//       tiny_page_box_refill(class_idx, out, max) will try to supply up to
+//       tiny_page_box_refill(class_idx, tls, out, max) will try to supply up to
 //       `max` BASE pointers using per-page freelist before falling back.
 //   - When disabled for a class: the box returns 0 and caller uses legacy path.
 //
@ -37,6 +37,7 @@
 #include "../superslab/superslab_types.h"   // For TinySlabMeta, SuperSlab
 #include "../box/tiny_next_ptr_box.h"       // For tiny_next_read()
 #include "../hakmem_tiny_superslab.h"       // For tiny_stride_for_class(), base helpers, superslab_ref_inc/dec
+#include "../box/tiny_mem_stats_box.h"      // For coarse memory accounting

 // Superslab active counter（Release Guard Box と整合性を取るためのカウンタ更新）
 extern void ss_active_add(SuperSlab* ss, uint32_t n);
@ -61,19 +62,28 @@ typedef struct TinyPageDesc {
 //   - enabled: このクラスで Page Box を使うかどうか
 //   - num_pages: 現在保持しているページ数（0〜TINY_PAGE_BOX_MAX_PAGES）
 //   - pages[]: TLS が掴んだ C7/C5/C6 ページの ring（小さなバッファ）
-typedef struct TinyPageBoxState {
+typedef struct TinyPageBoxContext {
    uint8_t      enabled;    // 1=Page Box enabled for this class, 0=disabled
    uint8_t      num_pages;  // 有効な pages[] エントリ数
    uint8_t      _pad[2];
    TinyPageDesc pages[TINY_PAGE_BOX_MAX_PAGES];
-} TinyPageBoxState;
+} TinyPageBoxContext;

-// TLS/state: one TinyPageBoxState per class（per-thread Box）
-extern __thread TinyPageBoxState g_tiny_page_box_state[TINY_NUM_CLASSES];
+// TLS/state: one TinyPageBoxContext per class（per-thread Box）
+extern __thread TinyPageBoxContext g_tiny_page_box[TINY_NUM_CLASSES];

 // One-shot init guard（per-thread）
 extern __thread int g_tiny_page_box_init_done;

+static inline int tiny_page_box_log_enabled(void) {
+    static int g_page_box_log = -1;
+    if (__builtin_expect(g_page_box_log == -1, 0)) {
+        const char* e = getenv("HAKMEM_TINY_PAGEBOX_LOG");
+        g_page_box_log = (e && *e && *e != '0') ? 1 : 0;
+    }
+    return g_page_box_log;
+}
+
 // Helper: parse class list from ENV and set enabled flags.
 // Default behaviour (ENV unset/empty) is to enable class 7 only.
 static inline void tiny_page_box_init_once(void) {
@ -82,13 +92,14 @@ static inline void tiny_page_box_init_once(void) {
    }

    // Clear all state
-    memset(g_tiny_page_box_state, 0, sizeof(g_tiny_page_box_state));
+    memset(g_tiny_page_box, 0, sizeof(g_tiny_page_box));
+    tiny_mem_stats_add_pagebox((ssize_t)sizeof(g_tiny_page_box));

    const char* env = getenv("HAKMEM_TINY_PAGE_BOX_CLASSES");
    if (!env || !*env) {
        // Default: enable mid-size classes (C5–C7)
        for (int c = 5; c <= 7 && c < TINY_NUM_CLASSES; c++) {
-            g_tiny_page_box_state[c].enabled = 1;
+            g_tiny_page_box[c].enabled = 1;
        }
    } else {
        // Parse simple comma-separated list of integers: "5,6,7"
@ -107,7 +118,7 @@ static inline void tiny_page_box_init_once(void) {
                p++;
            }
            if (val >= 0 && val < TINY_NUM_CLASSES) {
-                g_tiny_page_box_state[val].enabled = 1;
+                g_tiny_page_box[val].enabled = 1;
            }
        }
    }
@ -123,7 +134,7 @@ static inline int tiny_page_box_is_enabled(int class_idx) {
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
        return 0;
    }
-    return g_tiny_page_box_state[class_idx].enabled != 0;
+    return g_tiny_page_box[class_idx].enabled != 0;
 }

 // Forward declaration for TLS slab state（tiny_tls.h から参照）
@ -133,7 +144,7 @@ extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];
 // ここで Page Box が利用可能なページとして登録しておくことで、
 // 後続の unified_cache_refill() から Superslab/Warm Pool に落ちる前に
 // 「既に TLS が掴んでいるページ」を優先的に使えるようにする。
-static inline void tiny_page_box_on_new_slab(TinyTLSSlab* tls)
+static inline void tiny_page_box_on_new_slab(int class_idx, TinyTLSSlab* tls)
 {
    if (!tls) {
        return;
@ -143,6 +154,10 @@ static inline void tiny_page_box_on_new_slab(TinyTLSSlab* tls)
        tiny_page_box_init_once();
    }

+    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
+        return;
+    }
+
    SuperSlab* ss = tls->ss;
    TinySlabMeta* meta = tls->meta;
    uint8_t* base = tls->slab_base;
@ -152,12 +167,11 @@ static inline void tiny_page_box_on_new_slab(TinyTLSSlab* tls)
        return;
    }

-    int class_idx = (int)meta->class_idx;
-    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
+    if (meta->class_idx != (uint8_t)class_idx) {
        return;
    }

-    TinyPageBoxState* st = &g_tiny_page_box_state[class_idx];
+    TinyPageBoxContext* st = &g_tiny_page_box[class_idx];
    if (!st->enabled) {
        return;
    }
@ -200,9 +214,11 @@ static inline void tiny_page_box_on_new_slab(TinyTLSSlab* tls)
    superslab_ref_inc(ss);

 #if !HAKMEM_BUILD_RELEASE
-    // Debug: Track Page Box stats per-class
-    fprintf(stderr, "[PAGE_BOX_REG] class=%d num_pages=%u capacity=%u carved=%u\n",
-            class_idx, st->num_pages, meta->capacity, meta->carved);
+    // Debug: Track Page Box stats per-class（ENV: HAKMEM_TINY_PAGEBOX_LOG=0 で抑制）
+    if (tiny_page_box_log_enabled()) {
+        fprintf(stderr, "[PAGE_BOX_REG] class=%d num_pages=%u capacity=%u carved=%u\n",
+                class_idx, st->num_pages, meta->capacity, meta->carved);
+    }
 #endif
 }

@ -219,9 +235,11 @@ static inline void tiny_page_box_on_new_slab(TinyTLSSlab* tls)
 //   - Superslab/Shared Pool 呼び出し頻度を徐々に観測・調整できる。

 static inline int tiny_page_box_refill(int class_idx,
+                                       TinyTLSSlab* tls,
                                       void** out,
                                       int max_out)
 {
+    (void)tls; // reserved for future per-TLS hints
    if (!tiny_page_box_is_enabled(class_idx)) {
        return 0;
    }
@ -233,7 +251,7 @@ static inline int tiny_page_box_refill(int class_idx,
        return 0;
    }

-    TinyPageBoxState* st = &g_tiny_page_box_state[class_idx];
+    TinyPageBoxContext* st = &g_tiny_page_box[class_idx];
    if (st->num_pages == 0) {
        return 0;
    }
--- a/core/box/tiny_policy_learner_box.c
+++ b/core/box/tiny_policy_learner_box.c
@ -4,39 +4,78 @@
 #include "tiny_class_policy_box.h"
 #include "tiny_class_stats_box.h"
 #include <stdint.h>
+#include <stdio.h>

-// Simple OBSERVE/LEARN rule:
-//   - Choose top-2 classes by shared_pool_lock and enable Page Box for them.
-//   - Always keep existing warm_enabled / warm_cap (policy table is already seeded).
+// Simple OBSERVE/LEARN rule (auto profile only):
+//   - C7 は常に ON (page + warm, cap=8)
+//   - それ以外のクラスから score = shared_lock*4 + uc_miss の上位2つだけ page/warm を ON
+//   - warm_cap は C5–C7:8, それ以外:4
+//   - スコアが 0 なら何も変更しない
 void tiny_policy_learner_tick(void) {
+    if (!tiny_class_policy_is_auto()) {
+        return;
+    }
+
    TinyClassStatsThread snap = {0};
    tiny_class_stats_snapshot_global(&snap);

+    // 事前に全クラスを OFF ベースに初期化（cap はデフォルト値に）
+    for (int c = 0; c < TINY_NUM_CLASSES; c++) {
+        TinyClassPolicy* p = &g_tiny_class_policy[c];
+        p->page_box_enabled = 0;
+        p->warm_enabled = 0;
+        p->warm_cap = (c >= 5) ? 8 : 4;
+        p->tls_carve_enabled = 0;
+    }
+
+    // C7 は常に ON
+    g_tiny_class_policy[7].page_box_enabled = 1;
+    g_tiny_class_policy[7].warm_enabled = 1;
+    g_tiny_class_policy[7].warm_cap = 8;
+    g_tiny_class_policy[7].tls_carve_enabled = 1;
+
+    // C7 を除く上位2クラスをスコアで選択
    int top1 = -1, top2 = -1;
    uint64_t v1 = 0, v2 = 0;
    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
-        uint64_t v = snap.shared_lock[i];
-        if (v > v1) {
+        if (i == 7) continue;
+        uint64_t score = snap.shared_lock[i] * 4 + snap.uc_miss[i];
+        if (score > v1) {
            top2 = top1;
            v2 = v1;
            top1 = i;
-            v1 = v;
-        } else if (v > v2) {
+            v1 = score;
+        } else if (score > v2) {
            top2 = i;
-            v2 = v;
+            v2 = score;
        }
    }

-    // Nothing observed yet → leave policy untouched
+    // スコアが全く無い場合は C7 だけ維持
    if (v1 == 0) {
        return;
    }

-    for (int c = 0; c < TINY_NUM_CLASSES; c++) {
-        TinyClassPolicy* p = &g_tiny_class_policy[c];
-        if (c == top1 || c == top2) {
-            p->page_box_enabled = 1;
-            p->warm_enabled = 1;
+    if (top1 >= 0) {
+        TinyClassPolicy* p = &g_tiny_class_policy[top1];
+        p->page_box_enabled = 1;
+        p->warm_enabled = 1;
+        p->tls_carve_enabled = 1;
+    }
+    if (top2 >= 0 && v2 > 0) {
+        TinyClassPolicy* p = &g_tiny_class_policy[top2];
+        p->page_box_enabled = 1;
+        p->warm_enabled = 1;
+        p->tls_carve_enabled = 1;
+    }
+
+    // 1-shot ログ（最多 4 回まで）
+    static _Atomic uint32_t auto_logs = 0;
+    if (tiny_policy_log_enabled()) {
+        uint32_t n = atomic_fetch_add_explicit(&auto_logs, 1, memory_order_relaxed);
+        if (n < 4) {
+            fprintf(stderr, "[POLICY_AUTO_UPDATE] profile=auto (top=%d/%d)\n", top1, top2);
+            tiny_class_policy_dump(NULL);
        }
    }
 }
--- a/core/box/tiny_tls_carve_one_block_box.h
+++ b/core/box/tiny_tls_carve_one_block_box.h
@ -7,6 +7,7 @@
 #include "../tiny_debug_api.h"        // tiny_refill_failfast_level(), tiny_failfast_abort_ptr()
 #include "c7_meta_used_counter_box.h" // C7 meta->used telemetry (Release/Debug共通)
 #include "tiny_next_ptr_box.h"
+#include "tiny_class_stats_box.h"
 #include "../superslab/superslab_inline.h"
 #include <stdatomic.h>
 #include <signal.h>
@ -41,6 +42,8 @@ tiny_tls_carve_one_block(TinyTLSSlab* tls, int class_idx)
    if (meta->class_idx != (uint8_t)class_idx) return res;
    if (tls->slab_idx < 0 || tls->slab_idx >= ss_slabs_capacity(tls->ss)) return res;

+    tiny_class_stats_on_tls_carve_attempt(class_idx);
+
    // Freelist pop
    if (meta->freelist) {
 #if !HAKMEM_BUILD_RELEASE
@ -61,6 +64,7 @@ tiny_tls_carve_one_block(TinyTLSSlab* tls, int class_idx)
        meta->used++;
        c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_TLS);
        ss_active_add(tls->ss, 1);
+        tiny_class_stats_on_tls_carve_success(class_idx);
        res.block = block;
        res.path = TINY_TLS_CARVE_PATH_FREELIST;
        return res;
@ -93,6 +97,7 @@ tiny_tls_carve_one_block(TinyTLSSlab* tls, int class_idx)
        meta->used++;
        c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_TLS);
        ss_active_add(tls->ss, 1);
+        tiny_class_stats_on_tls_carve_success(class_idx);
        res.block = block;
        res.path = TINY_TLS_CARVE_PATH_LINEAR;
        return res;
--- a/core/box/warm_pool_prefill_box.h
+++ b/core/box/warm_pool_prefill_box.h
@ -9,6 +9,7 @@
 #include <stdint.h>
 #include <stdatomic.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include "../hakmem_tiny_config.h"
 #include "../hakmem_tiny_superslab.h"
 #include "../tiny_tls.h"
@ -18,8 +19,18 @@

 extern _Atomic uintptr_t g_c7_stage3_magic_ss;

+static inline int warm_prefill_log_enabled(void) {
+    static int g_warm_log = -1;
+    if (__builtin_expect(g_warm_log == -1, 0)) {
+        const char* e = getenv("HAKMEM_TINY_WARM_LOG");
+        g_warm_log = (e && *e && *e != '0') ? 1 : 0;
+    }
+    return g_warm_log;
+}
+
 static inline void warm_prefill_log_c7_meta(const char* tag, TinyTLSSlab* tls) {
    if (!tls || !tls->ss) return;
+    if (!warm_prefill_log_enabled()) return;
 #if HAKMEM_BUILD_RELEASE
    static _Atomic uint32_t rel_logs = 0;
    uint32_t n = atomic_fetch_add_explicit(&rel_logs, 1, memory_order_relaxed);
@ -116,7 +127,7 @@ static inline int warm_pool_do_prefill(int class_idx, TinyTLSSlab* tls, int warm
        }

        // C7 safety: prefer only pristine slabs (used=0 carved=0 freelist=NULL)
-        if (class_idx == 7) {
+        if (class_idx == 7 && warm_prefill_log_enabled()) {
            TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
            if (meta->class_idx == 7 &&
                (meta->used > 0 || meta->carved > 0 || meta->freelist != NULL)) {
@ -162,7 +173,7 @@ static inline int warm_pool_do_prefill(int class_idx, TinyTLSSlab* tls, int warm
            warm_pool_rel_c7_prefill_slab();
        }
        #else
-        if (class_idx == 7) {
+        if (class_idx == 7 && warm_prefill_log_enabled()) {
            static __thread int dbg_c7_prefill_logs = 0;
            if (dbg_c7_prefill_logs < 8) {
                TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
--- a/core/box/warm_pool_stats_box.h
+++ b/core/box/warm_pool_stats_box.h
@ -23,31 +23,19 @@ extern __thread TinyWarmPoolStats g_warm_pool_stats[TINY_NUM_CLASSES];
 // Record a warm pool hit
 // Called when warm_pool_pop() succeeds and carve produces blocks
 static inline void warm_pool_record_hit(int class_idx) {
-#if HAKMEM_DEBUG_COUNTERS
    g_warm_pool_stats[class_idx].hits++;
-#else
-    (void)class_idx;
-#endif
 }

 // Record a warm pool miss
 // Called when warm_pool_pop() returns NULL (pool empty)
 static inline void warm_pool_record_miss(int class_idx) {
-#if HAKMEM_DEBUG_COUNTERS
    g_warm_pool_stats[class_idx].misses++;
-#else
-    (void)class_idx;
-#endif
 }

 // Record a warm pool prefill event
 // Called when pool is empty and we do secondary prefill
 static inline void warm_pool_record_prefilled(int class_idx) {
-#if HAKMEM_DEBUG_COUNTERS
    g_warm_pool_stats[class_idx].prefilled++;
-#else
-    (void)class_idx;
-#endif
 }

 #endif // HAK_WARM_POOL_STATS_BOX_H