hakmem/core/hakmem_tiny.c

#include "hakmem_tiny.h"
#include "hakmem_tiny_config.h"    // Centralized configuration
#include "hakmem_phase7_config.h"  // Phase 7: Task 3 constants (PREWARM_COUNT, etc.)
#include "hakmem_tiny_superslab.h"
#include "box/ss_slab_meta_box.h"  // Phase 3d-A: SlabMeta Box boundary  // Phase 6.22: SuperSlab allocator
#include "hakmem_super_registry.h"  // Phase 8.2: SuperSlab registry for memory profiling
#include "hakmem_internal.h"
#include "hakmem_syscall.h"  // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD)
#include "hakmem_tiny_magazine.h"
#include "hakmem_tiny_integrity.h"  // PRIORITY 1-4: Corruption detection
#include "box/tiny_next_ptr_box.h"  // Box API: next pointer read/write
#include "box/ptr_conversion_box.h" // Box API: pointer conversion
#include "hakmem_env_cache.h"      // Priority-2: ENV cache
// Phase 1 modules (must come AFTER hakmem_tiny.h for TinyPool definition)
#include "hakmem_tiny_batch_refill.h"  // Phase 1: Batch refill/spill for mini-magazine
#include "hakmem_tiny_stats.h"     // Phase 1: Batched statistics (replaces XOR RNG)
// Phase 2B modules
#include "tiny_api.h"  // Consolidated: stats_api, query_api, rss_api, registry_api
#include "tiny_tls.h"
#include "tiny_debug.h"
#include "hakmem_debug_master.h"  // For unified debug level control
#include "tiny_mmap_gate.h"
#include "tiny_debug_ring.h"
#include "tiny_route.h"
#include "front/tiny_heap_v2.h"
#include "box/tiny_front_stats_box.h"
#include "tiny_tls_guard.h"
#include "tiny_ready.h"
#include "box/c7_meta_used_counter_box.h"
#include "box/tiny_c7_hotbox.h"
#include "box/tiny_heap_box.h"
#include "box/tiny_hotheap_v2_box.h"
#include "box/tiny_route_env_box.h"
#include "box/super_reg_box.h"
#include "hakmem_tiny_tls_list.h"
#include "hakmem_tiny_remote_target.h" // Phase 2C-1: Remote target queue
#include "hakmem_tiny_bg_spill.h"      // Phase 2C-2: Background spill queue
#include "tiny_adaptive_sizing.h"      // Phase 2b: Adaptive TLS cache sizing
// NOTE: hakmem_tiny_tls_ops.h included later (after type definitions)
#include "tiny_system.h"  // Consolidated: stdio, stdlib, string, etc.
#include "hakmem_prof.h"
#include "hakmem_trace.h"   // Optional USDT (perf) tracepoints

extern uint64_t g_bytes_allocated;  // from hakmem_tiny_superslab.c

// Tiny allocator configuration, debug counters, and return helpers
#include "hakmem_tiny_config_box.inc"

// ============================================================================ 
// Debug: TLS SLL last push tracking (for core/box/tls_sll_box.h)
// ============================================================================ 
__thread hak_base_ptr_t s_tls_sll_last_push[TINY_NUM_CLASSES] = {0};
__thread tiny_heap_ctx_t g_tiny_heap_ctx;
__thread int g_tiny_heap_ctx_init = 0;
__thread tiny_hotheap_ctx_v2* g_tiny_hotheap_ctx_v2 = NULL;
TinyHeapClassStats g_tiny_heap_stats[TINY_NUM_CLASSES] = {0};
TinyC7PageStats g_c7_page_stats = {0};
tiny_route_kind_t g_tiny_route_class[TINY_NUM_CLASSES] = {0};
int g_tiny_route_snapshot_done = 0;
_Atomic uint64_t g_tiny_front_alloc_class[TINY_NUM_CLASSES] = {0};
_Atomic uint64_t g_tiny_front_free_class[TINY_NUM_CLASSES] = {0};

static int tiny_heap_stats_dump_enabled(void) {
    static int g = -1;
    if (__builtin_expect(g == -1, 0)) {
        const char* eh = getenv("HAKMEM_TINY_HEAP_STATS_DUMP");
        const char* e = getenv("HAKMEM_TINY_C7_HEAP_STATS_DUMP");
        g = ((eh && *eh && *eh != '0') || (e && *e && *e != '0')) ? 1 : 0;
    }
    return g;
}

__attribute__((destructor))
static void tiny_heap_stats_dump(void) {
    if (!tiny_heap_stats_enabled() || !tiny_heap_stats_dump_enabled()) {
        return;
    }
    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
        TinyHeapClassStats snap = {
            .alloc_fast_current = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_fast_current, memory_order_relaxed),
            .alloc_slow_prepare = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_slow_prepare, memory_order_relaxed),
            .free_fast_local = atomic_load_explicit(&g_tiny_heap_stats[cls].free_fast_local, memory_order_relaxed),
            .free_slow_fallback = atomic_load_explicit(&g_tiny_heap_stats[cls].free_slow_fallback, memory_order_relaxed),
            .alloc_prepare_fail = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_prepare_fail, memory_order_relaxed),
            .alloc_fail = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_fail, memory_order_relaxed),
        };
        if (snap.alloc_fast_current == 0 && snap.alloc_slow_prepare == 0 &&
            snap.free_fast_local == 0 && snap.free_slow_fallback == 0 &&
            snap.alloc_prepare_fail == 0 && snap.alloc_fail == 0) {
            continue;
        }
        fprintf(stderr,
                "[HEAP_STATS cls=%d] alloc_fast_current=%llu alloc_slow_prepare=%llu free_fast_local=%llu free_slow_fallback=%llu alloc_prepare_fail=%llu alloc_fail=%llu\n",
                cls,
                (unsigned long long)snap.alloc_fast_current,
                (unsigned long long)snap.alloc_slow_prepare,
                (unsigned long long)snap.free_fast_local,
                (unsigned long long)snap.free_slow_fallback,
                (unsigned long long)snap.alloc_prepare_fail,
                (unsigned long long)snap.alloc_fail);
    }
    TinyC7PageStats ps = {
        .prepare_calls = atomic_load_explicit(&g_c7_page_stats.prepare_calls, memory_order_relaxed),
        .prepare_with_current_null = atomic_load_explicit(&g_c7_page_stats.prepare_with_current_null, memory_order_relaxed),
        .prepare_from_partial = atomic_load_explicit(&g_c7_page_stats.prepare_from_partial, memory_order_relaxed),
        .current_set_from_free = atomic_load_explicit(&g_c7_page_stats.current_set_from_free, memory_order_relaxed),
        .current_dropped_to_partial = atomic_load_explicit(&g_c7_page_stats.current_dropped_to_partial, memory_order_relaxed),
    };
    if (ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial ||
        ps.current_set_from_free || ps.current_dropped_to_partial) {
        fprintf(stderr,
                "[C7_PAGE_STATS] prepare_calls=%llu prepare_with_current_null=%llu prepare_from_partial=%llu current_set_from_free=%llu current_dropped_to_partial=%llu\n",
                (unsigned long long)ps.prepare_calls,
                (unsigned long long)ps.prepare_with_current_null,
                (unsigned long long)ps.prepare_from_partial,
                (unsigned long long)ps.current_set_from_free,
                (unsigned long long)ps.current_dropped_to_partial);
        fflush(stderr);
    }
}

__attribute__((destructor))
static void tiny_front_class_stats_dump(void) {
    if (!tiny_front_class_stats_dump_enabled()) {
        return;
    }
    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
        uint64_t a = atomic_load_explicit(&g_tiny_front_alloc_class[cls], memory_order_relaxed);
        uint64_t f = atomic_load_explicit(&g_tiny_front_free_class[cls], memory_order_relaxed);
        if (a == 0 && f == 0) {
            continue;
        }
        fprintf(stderr, "[FRONT_CLASS cls=%d] alloc=%llu free=%llu\n",
                cls, (unsigned long long)a, (unsigned long long)f);
    }
}

__attribute__((destructor))
static void tiny_c7_delta_debug_destructor(void) {
    if (tiny_c7_meta_light_enabled() && tiny_c7_delta_debug_enabled()) {
        tiny_c7_heap_debug_dump_deltas();
    }
    if (tiny_heap_meta_light_enabled_for_class(6) && tiny_c6_delta_debug_enabled()) {
        tiny_c6_heap_debug_dump_deltas();
    }
}

// =============================================================================
// TinyHotHeap v2 (Phase30/31 wiring). Currently C7-only thin wrapper.
// =============================================================================
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc = 0;
static _Atomic uint64_t g_tiny_hotheap_v2_c7_free = 0;
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_fallback = 0;
static _Atomic uint64_t g_tiny_hotheap_v2_c7_free_fallback = 0;
tiny_hotheap_ctx_v2* tiny_hotheap_v2_tls_get(void) {
    tiny_hotheap_ctx_v2* ctx = g_tiny_hotheap_ctx_v2;
    if (__builtin_expect(ctx == NULL, 0)) {
        ctx = (tiny_hotheap_ctx_v2*)calloc(1, sizeof(tiny_hotheap_ctx_v2));
        if (__builtin_expect(ctx == NULL, 0)) {
            fprintf(stderr, "[TinyHotHeapV2] TLS alloc failed (OOM)\n");
            abort();
        }
        g_tiny_hotheap_ctx_v2 = ctx;
        // C7 用 stride を最初にだけ設定（他クラスは未使用のまま）
        ctx->cls[7].stride = (uint16_t)tiny_stride_for_class(7);
    }
    return ctx;
}

void* tiny_hotheap_v2_alloc(uint8_t class_idx) {
    atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc, 1, memory_order_relaxed);
    if (__builtin_expect(class_idx != 7, 0)) {
        return NULL;  // いまは C7 専用
    }

    tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get();
    tiny_hotheap_class_v2* vhcls = &v2ctx->cls[7];
    tiny_hotheap_page_v2* v2page = vhcls ? vhcls->current_page : NULL;
    if (!v2page && vhcls) {
        v2page = &vhcls->storage_page;
        tiny_hotheap_v2_page_reset(v2page);
        vhcls->current_page = v2page;
    }

    tiny_heap_ctx_t* v1ctx = tiny_heap_ctx_for_thread();
    tiny_heap_class_t* v1hcls = tiny_heap_class(v1ctx, 7);
    TinyHeapClassStats* stats = tiny_heap_stats_for_class(7);

    // Hot path: current_page の lease をそのまま使う
    if (v2page && v2page->lease_page && v1hcls) {
        tiny_heap_page_t* ipage = v2page->lease_page;
        if (ipage->free_list || ipage->used < ipage->capacity) {
            void* user = tiny_heap_page_pop(v1hcls, 7, ipage);
            if (user) {
                if (ipage->used >= ipage->capacity && ipage->free_list == NULL) {
                    tiny_heap_page_mark_full(v1hcls, ipage);
                }
                if (__builtin_expect(stats != NULL, 0)) {
                    atomic_fetch_add_explicit(&stats->alloc_fast_current, 1, memory_order_relaxed);
                }
                v2page->freelist = ipage->free_list;
                v2page->used = ipage->used;
                return user;
            }
        }
    }

    if (__builtin_expect(stats != NULL, 0)) {
        atomic_fetch_add_explicit(&stats->alloc_slow_prepare, 1, memory_order_relaxed);
    }

    // Lease a page from v1 (C7 SAFE) and wrap it
    TinyHeapPageLease lease = tiny_heap_c7_lease_page_for_v2();
    if (!lease.page || !vhcls || !v1hcls) {
        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_fallback, 1, memory_order_relaxed);
        size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(7)) : tiny_stride_for_class(7);
        return tiny_c7_alloc_fast(size);  // safety fallback to v1
    }

    if (!v2page) {
        v2page = &vhcls->storage_page;
        tiny_hotheap_v2_page_reset(v2page);
        vhcls->current_page = v2page;
    }

    v2page->lease_page = lease.page;
    v2page->meta = lease.meta;
    v2page->ss = lease.ss;
    v2page->base = lease.base;
    v2page->capacity = lease.capacity;
    v2page->slab_idx = lease.slab_idx;
    v2page->freelist = lease.freelist;
    v2page->used = lease.page->used;

    if (lease.page->free_list || lease.page->used < lease.page->capacity) {
        void* user = tiny_heap_page_pop(v1hcls, 7, lease.page);
        if (user) {
            if (lease.page->used >= lease.page->capacity && lease.page->free_list == NULL) {
                tiny_heap_page_mark_full(v1hcls, lease.page);
            }
            if (__builtin_expect(stats != NULL, 0)) {
                atomic_fetch_add_explicit(&stats->alloc_fast_current, 1, memory_order_relaxed);
            }
            v2page->freelist = lease.page->free_list;
            v2page->used = lease.page->used;
            return user;
        }
    }

    // Lease 取得後でも pop できなければ v1 に委譲
    atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_fallback, 1, memory_order_relaxed);
    size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(7)) : tiny_stride_for_class(7);
    return tiny_c7_alloc_fast(size);
}

void tiny_hotheap_v2_free(uint8_t class_idx, void* p, void* meta) {
    if (__builtin_expect(class_idx != 7, 0)) {
        return;
    }
    atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_free, 1, memory_order_relaxed);
    tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get();
    tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[7] : NULL;
    tiny_hotheap_page_v2* v2page = vhcls ? vhcls->current_page : NULL;
    TinySlabMeta* meta_ptr = (TinySlabMeta*)meta;

    tiny_heap_ctx_t* v1ctx = tiny_heap_ctx_for_thread();
    tiny_heap_class_t* v1hcls = tiny_heap_class(v1ctx, 7);

    if (v2page && v2page->lease_page && meta_ptr && v1hcls &&
        v2page->meta == meta_ptr && tiny_heap_ptr_in_page_range(v2page->lease_page, p)) {
        tiny_heap_page_free_local(v1ctx, 7, v2page->lease_page, p);
        v2page->freelist = v2page->lease_page->free_list;
        v2page->used = v2page->lease_page->used;
        vhcls->current_page = v2page;  // keep pinned
        return;
    }

    // Fallback: mimic v1 free path
    atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_free_fallback, 1, memory_order_relaxed);
    SuperSlab* ss = hak_super_lookup(p);
    if (ss && ss->magic == SUPERSLAB_MAGIC) {
        int slab_idx = slab_index_for(ss, p);
        if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
            tiny_c7_free_fast_with_meta(ss, slab_idx, p);
            return;
        }
    }
    tiny_c7_free_fast(p);
}

#if !HAKMEM_BUILD_RELEASE
// Helper to dump last push from core/hakmem.c (SEGV handler)
// Must be visible to other TUs (extern in hakmem_tiny.h or similar if needed,
// but SEGV handler is in core/hakmem.c which can dlsym or weak link it)
__attribute__((noinline))
void tiny_debug_dump_last_push(int cls) {
    hak_base_ptr_t p = s_tls_sll_last_push[cls];
    void* raw = HAK_BASE_TO_RAW(p);
    fprintf(stderr, "[DEBUG] s_tls_sll_last_push[%d] = %p\n", cls, raw);
    if (raw && (uintptr_t)raw > 4096) {
        unsigned long* vals = (unsigned long*)raw;
        fprintf(stderr, "[DEBUG] Memory at %p: %016lx %016lx\n", raw, vals[0], vals[1]);
    }
}
#endif
// Forward declarations for static helpers used before definition
struct TinySlab; // forward
static void move_to_free_list(int class_idx, struct TinySlab* target_slab);
static void move_to_full_list(int class_idx, struct TinySlab* target_slab);
static void release_slab(struct TinySlab* slab);
static TinySlab* allocate_new_slab(int class_idx);
static void tiny_tls_cache_drain(int class_idx);
static void tiny_apply_mem_diet(void);

// Phase 6.23: SuperSlab allocation forward declaration
static inline void* hak_tiny_alloc_superslab(int class_idx);
static inline void* superslab_tls_bump_fast(int class_idx);
SuperSlab* superslab_refill(int class_idx);
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx);
static inline uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap);
// Forward decl: used by tiny_spec_pop_path before its definition
#if HAKMEM_TINY_P0_BATCH_REFILL
// P0 enabled: sll_refill_batch_from_ss is defined in hakmem_tiny_refill_p0.inc.h
static inline int sll_refill_batch_from_ss(int class_idx, int max_take);
#else
// Phase 12: sll_refill_small_from_ss is defined in hakmem_tiny_refill.inc.h
// Only a single implementation exists there; declare here for callers.
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
int sll_refill_small_from_ss(int class_idx, int max_take);
#else
static inline int sll_refill_small_from_ss(int class_idx, int max_take);
#endif
#endif
static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss);
static void* __attribute__((cold, noinline)) tiny_slow_alloc_fast(int class_idx);
static inline void tiny_remote_drain_owner(struct TinySlab* slab);
static void tiny_remote_drain_locked(struct TinySlab* slab);
// Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop
// without any refill or slow-path work. Returns NULL on miss.
/* moved below TinyTLSSlab definition */

// Step 3d: Forced inlining for readability + performance (306M target)
__attribute__((always_inline))
static inline void* hak_tiny_alloc_wrapper(int class_idx);
// Helpers for SuperSlab active block accounting (atomic, saturating dec)

// SuperSlab Active Counter Helpers - EXTRACTED to hakmem_tiny_ss_active_box.inc
#include "hakmem_tiny_ss_active_box.inc"

// EXTRACTED: ss_active_dec_one() moved to hakmem_tiny_superslab.h (Phase 2C-2)

// Front refill count global config (declare before init.inc uses them)
extern int g_refill_count_global;
extern int g_refill_count_hot;
extern int g_refill_count_mid;
extern int g_refill_count_class[TINY_NUM_CLASSES];

// Step 3d: Forced inlining for slow path (maintain monolithic performance)
// Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c)
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx);
#else
static void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx);
#endif

// --------------------------------------------------------------------------- 
// Box: adopt_gate_try (implementation moved from header for robust linkage)
// --------------------------------------------------------------------------- 
#include "box/adopt_gate_box.h"
#include "box/super_reg_box.h"
extern int g_super_reg_class_size[TINY_NUM_CLASSES];
extern unsigned long long g_adopt_gate_calls[];
extern unsigned long long g_adopt_gate_success[];
extern unsigned long long g_reg_scan_attempts[];
extern unsigned long long g_reg_scan_hits[];
SuperSlab* adopt_gate_try(int class_idx, TinyTLSSlab* tls) {
    g_adopt_gate_calls[class_idx]++;
    ROUTE_MARK(13);
    SuperSlab* ss = tiny_refill_try_fast(class_idx, tls);
    if (ss) { g_adopt_gate_success[class_idx]++; return ss; }
    g_reg_scan_attempts[class_idx]++;
    int reg_size = g_super_reg_class_size[class_idx];
    int reg_cap = super_reg_effective_per_class();
    if (reg_cap > 0 && reg_size > reg_cap) {
        reg_size = reg_cap;
    }
    int scan_limit = tiny_reg_scan_max();
    if (scan_limit > reg_size) scan_limit = reg_size;
    uint32_t self_tid = tiny_self_u32();
    // Local helper (mirror adopt_bind_if_safe) to avoid including alloc inline here
    auto int adopt_bind_if_safe_local(TinyTLSSlab* tls_l, SuperSlab* ss, int slab_idx, int class_idx_l) {
        uint32_t self_tid = tiny_self_u32();
        SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
        if (!slab_is_valid(&h)) return 0;
        slab_drain_remote_full(&h);
        if (__builtin_expect(slab_is_safe_to_bind(&h), 1)) {
            tiny_tls_bind_slab(tls_l, h.ss, h.slab_idx);
            slab_release(&h);
            return 1;
        }
        slab_release(&h);
        return 0;
    }

    for (int i = 0; i < scan_limit; i++) {
        SuperSlab* cand = super_reg_by_class_at(class_idx, i);
        if (!(cand && cand->magic == SUPERSLAB_MAGIC)) continue;
        // Fast path: use nonempty_mask / freelist_mask to locate candidates in O(1)
        uint32_t mask = cand->nonempty_mask;
        // Fallback to atomic freelist_mask for cross-thread visibility
        if (mask == 0) {
            mask = atomic_load_explicit(&cand->freelist_mask, memory_order_acquire);
        }
        if (mask == 0) continue;  // No visible freelists in this SS
        int cap = ss_slabs_capacity(cand);
        while (mask) {
            int sidx = __builtin_ctz(mask);
            mask &= (mask - 1);
            if (sidx >= cap) continue;
            if (adopt_bind_if_safe_local(tls, cand, sidx, class_idx)) {
                g_adopt_gate_success[class_idx]++;
                g_reg_scan_hits[class_idx]++;
                ROUTE_MARK(14); ROUTE_COMMIT(class_idx, 0x07);
                return cand;
            }
        }
    }
    return NULL;
}


// ============================================================================ 
// Global State - EXTRACTED to hakmem_tiny_globals_box.inc
// ============================================================================ 
#include "hakmem_tiny_globals_box.inc"

#include "hakmem_tiny_publish_box.inc"

// ============================================================================ 
// EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1)
// ============================================================================ 
// Functions: tiny_fast_pop(), tiny_fast_push() - 28 lines (lines 377-404)
// Forward declarations for functions defined in hakmem_tiny_fastcache.inc.h
static inline hak_base_ptr_t tiny_fast_pop(int class_idx);
static inline int tiny_fast_push(int class_idx, hak_base_ptr_t ptr);
static inline hak_base_ptr_t fastcache_pop(int class_idx);
static inline int fastcache_push(int class_idx, hak_base_ptr_t ptr);

// ============================================================================ 
// EXTRACTED TO hakmem_tiny_hot_pop.inc.h (Phase 2D-1)
// ============================================================================ 
// Functions: tiny_hot_pop_class0(), tiny_hot_pop_class1(), tiny_hot_pop_class2(), tiny_hot_pop_class3()
// 88 lines (lines 407-494)


// ============================================================================ 
// Legacy Slow Allocation Path - ARCHIVED
// ============================================================================ 
// Note: tiny_slow_alloc_fast() and related legacy slow path implementation
// have been moved to archive/hakmem_tiny_legacy_slow_box.inc and are no
// longer compiled. The current slow path uses Box化された hak_tiny_alloc_slow().


// ============================================================================ 
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================ 
// Function: tiny_fast_refill_and_take() - 39 lines (lines 584-622)

// ============================================================================ 
// TLS/Frontend State & Configuration - EXTRACTED to hakmem_tiny_tls_state_box.inc
// ============================================================================ 
#include "hakmem_tiny_tls_state_box.inc"

#include "hakmem_tiny_intel.inc"

// ============================================================================ 
// EXTRACTED TO hakmem_tiny_rss.c (Phase 2B-2)
// ============================================================================ 
// EXTRACTED: static int get_rss_kb_self(void) {
// EXTRACTED:     FILE* f = fopen("/proc/self/status", "r");
// EXTRACTED:     if (!f) return 0;
// EXTRACTED:     char buf[256];
// EXTRACTED:     int kb = 0;
// EXTRACTED:     while (fgets(buf, sizeof(buf), f)) {
// EXTRACTED:         if (strncmp(buf, "VmRSS:", 6) == 0) {
// EXTRACTED:             char* p = buf;
// EXTRACTED:             while (*p && (*p < '0' || *p > '9')) {
// EXTRACTED:                 p++;
// EXTRACTED:             }
// EXTRACTED:             kb = atoi(p);
// EXTRACTED:             break;
// EXTRACTED:         }
// EXTRACTED:     }
// EXTRACTED:     fclose(f);
// EXTRACTED:     return kb;
// EXTRACTED: }

// Miss時にマガジンへ大量リフィルせず、1個だけ確保して即返すオプション
// Env: HAKMEM_TINY_REFILL_ONE_ON_MISS=1 で有効（デフォルト: 0）
int g_refill_one_on_miss = 0;

// Frontend fill target per class (adaptive)
// NOTE: Non-static because used in hakmem_tiny_refill.inc.h
_Atomic uint32_t g_frontend_fill_target[TINY_NUM_CLASSES];

// Adaptive CAS: Active thread counter (for single-threaded optimization)
// Incremented on thread init, decremented on thread shutdown
_Atomic uint32_t g_hakmem_active_threads = 0;

// Per-thread registration flag (TLS variable)
static __thread int g_thread_registered = 0;

// Adaptive CAS: Register current thread (called on first allocation)
// NOTE: Non-static for cross-TU visibility (called from hak_alloc_api.inc.h)
__attribute__((always_inline))
inline void hakmem_thread_register(void) {
    if (__builtin_expect(g_thread_registered == 0, 0)) {
        g_thread_registered = 1;
        atomic_fetch_add_explicit(&g_hakmem_active_threads, 1, memory_order_relaxed);
    }
}

// SLL capacity override array (moved from deleted hakmem_tiny_ultra_batch_box.inc)
static int g_ultra_batch_override[TINY_NUM_CLASSES] = {0};
static int g_ultra_sll_cap_override[TINY_NUM_CLASSES] = {0};

// Helper function for batch size (moved from deleted hakmem_tiny_ultra_batch_box.inc)
static inline int ultra_batch_for_class(int class_idx) {
    int ov = g_ultra_batch_override[class_idx];
    if (ov > 0) return ov;
    switch (class_idx) {
        case 0: return 64;            // 8B
        case 1: return 96;            // 16B
        case 2: return 96;            // 32B
        case 3: return 224;           // 64B
        case 4: return 96;            // 128B
        case 5: return 64;            // 256B
        case 6: return 64;            // 512B
        default: return 32;           // 1024B and others
    }
}

// Helper function for SLL capacity (moved from deleted hakmem_tiny_ultra_batch_box.inc)
static inline int ultra_sll_cap_for_class(int class_idx) {
    int ov = g_ultra_sll_cap_override[class_idx];
    if (ov > 0) return ov;
    switch (class_idx) {
        case 0: return 256;   // 8B
        case 1: return 384;   // 16B
        case 2: return 384;   // 32B
        case 3: return 768;   // 64B
        case 4: return 256;   // 128B
        default: return 128;  // others
    }
}

enum { HAK_TIER_SLL=1, HAK_TIER_MAG=2, HAK_TIER_SLAB=3, HAK_TIER_SUPER=4, HAK_TIER_FRONT=5 };


// Event Queue & Telemetry Helpers - EXTRACTED to hakmem_tiny_eventq_box.inc
#include "hakmem_tiny_eventq_box.inc"


// Background refill workers and intelligence engine
#include "hakmem_tiny_background.inc"

// ============================================================================ 
// EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1)
// ============================================================================ 
// Functions: fastcache_pop(), fastcache_push(), quick_pop() - 25 lines (lines 873-896)

// Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop
// without any refill or slow-path work. Returns NULL on miss.
static inline void* hak_tiny_alloc_superslab_try_fast(int class_idx) {
    if (!g_use_superslab) return NULL;
    TinyTLSSlab* tls = &g_tls_slabs[class_idx];
    TinySlabMeta* meta = tls->meta;
    if (!meta) return NULL;
    // Try linear (bump) allocation first when freelist is empty
    if (meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
        // Use per-slab class_idx to get stride
        size_t block_size = tiny_stride_for_class(meta->class_idx);
        void* block = tls->slab_base + ((size_t)meta->used * block_size);
        meta->used++;
        c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_FRONT);
        // Track active blocks in SuperSlab for conservative reclamation
        ss_active_inc(tls->ss);
        return block;
    }
    // Do not pop freelist here (keep magazine/SLL handling consistent)
    return NULL;
}

// ============================================================================ 
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================ 
// Functions: quick_refill_from_sll(), quick_refill_from_mag() - 31 lines (lines 918-949)

// ============================================================================ 
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================ 
// Function: sll_refill_small_from_ss() - 45 lines (lines 952-996)

// Phase 2C-3: TLS operations module (included after helper function definitions)
#include "hakmem_tiny_tls_ops.h"

// New TLS list refill: owner-only bulk take from TLS-cached SuperSlab slab
// ============================================================================ 
// EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3)
// ============================================================================ 
// Function: tls_refill_from_tls_slab() - 101 lines
// Hot path refill operation, moved to inline function in header

// ============================================================================ 
// EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3)
// ============================================================================ 
// Function: tls_list_spill_excess() - 97 lines
// Hot path spill operation, moved to inline function in header

// ============================================================================ 
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================ 
// Function: superslab_tls_bump_fast() - 45 lines (lines 1016-1060)

// ============================================================================ 
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================ 
// Function: frontend_refill_fc() - 44 lines (lines 1063-1106)


// SLL capacity policy: for hot tiny classes (0..3), allow larger SLL up to multiplier * mag_cap
// for >=4 keep current conservative half (to limit footprint).

// SLL Capacity Policy - EXTRACTED to hakmem_tiny_sll_cap_box.inc
#include "hakmem_tiny_sll_cap_box.inc"


// ============================================================================ 
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================ 
// Function: bulk_mag_to_sll_if_room() - 22 lines (lines 1133-1154)

// Ultra-Mode Batch Configuration - REMOVED (dead code cleanup 2025-11-27)

#include "hakmem_tiny_remote.inc"

// ============================================================================ 
// Internal Helpers
// ============================================================================ 

// Step 2: Slab Registry Operations

// Hash function for slab_base (64KB aligned)
// ============================================================================ 
// EXTRACTED TO hakmem_tiny_registry.c (Phase 2B-3)
// ============================================================================ 
// EXTRACTED: static inline int registry_hash(uintptr_t slab_base) {
// EXTRACTED:     return (slab_base >> 16) & SLAB_REGISTRY_MASK;
// EXTRACTED: }

// Register slab in hash table (returns 1 on success, 0 on failure)
// EXTRACTED: static int registry_register(uintptr_t slab_base, TinySlab* owner) {
// EXTRACTED:     pthread_mutex_lock(&g_tiny_registry_lock);
// EXTRACTED:     int hash = registry_hash(slab_base);
// EXTRACTED: 
// EXTRACTED:     // Linear probing (max 8 attempts)
// EXTRACTED:     for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
// EXTRACTED:         int idx = (hash + i) & SLAB_REGISTRY_MASK;
// EXTRACTED:         SlabRegistryEntry* entry = &g_slab_registry[idx];
// EXTRACTED: 
// EXTRACTED:         if (entry->slab_base == 0) {
// EXTRACTED:             // Empty slot found
// EXTRACTED:             entry->slab_base = slab_base;
// EXTRACTED:             atomic_store_explicit(&entry->owner, owner, memory_order_release);
// EXTRACTED:             pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED:             return 1;
// EXTRACTED:         }
// EXTRACTED:     }
// EXTRACTED: 
// EXTRACTED:     // Registry full (collision limit exceeded)
// EXTRACTED:     pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED:     return 0;
// EXTRACTED: }

// Unregister slab from hash table
// EXTRACTED: static void registry_unregister(uintptr_t slab_base) {
// EXTRACTED:     pthread_mutex_lock(&g_tiny_registry_lock);
// EXTRACTED:     int hash = registry_hash(slab_base);
// EXTRACTED: 
// EXTRACTED:     // Linear probing search
// EXTRACTED:     for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
// EXTRACTED:         int idx = (hash + i) & SLAB_REGISTRY_MASK;
// EXTRACTED:         SlabRegistryEntry* entry = &g_slab_registry[idx];
// EXTRACTED: 
// EXTRACTED:         if (entry->slab_base == slab_base) {
// EXTRACTED:             // Found - clear entry (atomic store prevents TOCTOU race)
// EXTRACTED:             atomic_store_explicit(&entry->owner, NULL, memory_order_release);
// EXTRACTED:             entry->slab_base = 0;
// EXTRACTED:             pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED:             return;
// EXTRACTED:         }
// EXTRACTED: 
// EXTRACTED:         if (entry->slab_base == 0) {
// EXTRACTED:             // Empty slot - not found
// EXTRACTED:             pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED:             return;
// EXTRACTED:         }
// EXTRACTED:     }
// EXTRACTED:     pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED: }

// Lookup slab by base address (O(1) average)

// ============================================================================ 
// Registry Lookup & Owner Slab Discovery - EXTRACTED to hakmem_tiny_slab_lookup_box.inc
// ============================================================================ 
#include "hakmem_tiny_slab_lookup_box.inc"


// Function: move_to_full_list() - 20 lines (lines 1104-1123)
// Move slab to full list

// Function: move_to_free_list() - 20 lines (lines 1126-1145)
// Move slab to free list

// ============================================================================ 
// Public API
// ============================================================================ 

// ============================================================================ 
// Phase 2D-2: Initialization function (extracted to hakmem_tiny_init.inc)
// ============================================================================ 
#include "hakmem_tiny_init.inc"

// ============================================================================ 
// 3-Layer Architecture (2025-11-01 Simplification)
// ============================================================================ 
// Layer 1: TLS Bump Allocator (ultra-fast, 2-3 instructions/op)
#include "hakmem_tiny_bump.inc.h"

// Layer 2: TLS Small Magazine (fast, 5-10 instructions/op)
#include "hakmem_tiny_smallmag.inc.h"

// ============================================================================ 
// Phase 6 Fast Path Option (Metadata Header)
// ============================================================================ 
// Phase 6-1.6: Metadata Header (recommended)
//   - Enable: -DHAKMEM_TINY_PHASE6_METADATA=1
//   - Speed: 450-480 M ops/sec (expected, Phase 6-1 level)
//   - Memory: ~6-12% overhead (8 bytes/allocation)
//   - Method: Store pool_type + size_class in 8-byte header
//   - Benefit: Extends to ALL pools (Tiny/Mid/L25/Whale)
//   - Eliminates: Registry lookups, mid_lookup, owner checks
// ============================================================================ 

// Forward declarations for Phase 6 alloc/free functions


// ============================================================================ 
// Phase 6 Wrapper Functions - EXTRACTED to hakmem_tiny_phase6_wrappers_box.inc
// ============================================================================ 
#include "hakmem_tiny_phase6_wrappers_box.inc"


// Layer 1-3: Main allocation function (simplified)
// Build-time configurable via: -DHAKMEM_TINY_USE_NEW_3LAYER=1
#ifndef HAKMEM_TINY_USE_NEW_3LAYER
#define HAKMEM_TINY_USE_NEW_3LAYER 0  // default OFF (legacy path)
#endif
#if HAKMEM_TINY_USE_NEW_3LAYER
#include "hakmem_tiny_alloc_new.inc"
#else
// Old 6-7 layer architecture (backup)
#include "hakmem_tiny_alloc.inc"
#endif

#include "hakmem_tiny_slow.inc"

// Free path implementations
#include "hakmem_tiny_free.inc"

// ---- Phase 1: Provide default batch-refill symbol (fallback to small refill)
// Allows runtime gate HAKMEM_TINY_REFILL_BATCH=1 without requiring a rebuild.
#ifndef HAKMEM_TINY_P0_BATCH_REFILL
int sll_refill_small_from_ss(int class_idx, int max_take);
__attribute__((weak)) int sll_refill_batch_from_ss(int class_idx, int max_take)
{
    return sll_refill_small_from_ss(class_idx, max_take);
}
#endif

// ============================================================================ 
// EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3)
// ============================================================================ 
// Function: hak_tiny_trim() - 116 lines (lines 1164-1279)
// Public trim and cleanup operation for lifecycle management

// Forward decl for internal registry lookup used by ultra safety validation
static TinySlab* registry_lookup(uintptr_t slab_base);

// ultra_sll_cap_for_class moved earlier in file (before hakmem_tiny_free.inc)

static inline int ultra_validate_sll_head(int class_idx, void* head) {
    uintptr_t base = ((uintptr_t)head) & ~(TINY_SLAB_SIZE - 1);
    TinySlab* owner = registry_lookup(base);
    if (!owner) return 0;
    uintptr_t start = (uintptr_t)owner->base;
    if ((uintptr_t)head < start || (uintptr_t)head >= start + TINY_SLAB_SIZE) return 0;
    return (owner->class_idx == class_idx);
}
// Optional: wrapper TLS guard（ラッパー再入検知をTLSカウンタで）
#ifndef HAKMEM_WRAPPER_TLS_GUARD
#define HAKMEM_WRAPPER_TLS_GUARD 0
#endif
#if HAKMEM_WRAPPER_TLS_GUARD
extern __thread int g_tls_in_wrapper;
#endif

// ============================================================================ 
// EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3)
// ============================================================================ 
// Function: tiny_tls_cache_drain() - 90 lines (lines 1314-1403)
// Static function for draining TLS caches
//
// Function: tiny_apply_mem_diet() - 20 lines (lines 1405-1424)
// Static function for memory diet mode application
//
// Phase 2D-3: Lifecycle management functions (226 lines total)
#include "hakmem_tiny_lifecycle.inc"

// Phase 2D-4 (FINAL): Slab management functions (142 lines total)
#include "hakmem_tiny_slab_mgmt.inc"

// Tiny Heap v2 stats dump (opt-in)
void tiny_heap_v2_print_stats(void) {
    // Priority-2: Use cached ENV
    if (!HAK_ENV_TINY_HEAP_V2_STATS()) return;

    fprintf(stderr, "\n[HeapV2] TLS magazine stats (per class, thread-local)\n");
    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
        TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[cls];
        TinyHeapV2Stats* st = &g_tiny_heap_v2_stats[cls];
        fprintf(stderr,
                "C%d: top=%d alloc_calls=%llu mag_hits=%llu refill_calls=%llu refill_blocks=%llu backend_oom=%llu\n",
                cls,
                mag->top,
                (unsigned long long)st->alloc_calls,
                (unsigned long long)st->mag_hits,
                (unsigned long long)st->refill_calls,
                (unsigned long long)st->refill_blocks,
                (unsigned long long)st->backend_oom);
    }
}

static void tiny_heap_v2_stats_atexit(void) __attribute__((destructor));
static void tiny_heap_v2_stats_atexit(void) {
    tiny_heap_v2_print_stats();
}

// Size→class routing for >=1024B (env: HAKMEM_TINY_ALLOC_1024_METRIC)
_Atomic uint64_t g_tiny_alloc_ge1024[TINY_NUM_CLASSES] = {0};
static void tiny_alloc_1024_diag_atexit(void) __attribute__((destructor));
static void tiny_alloc_1024_diag_atexit(void) {
    // Priority-2: Use cached ENV
    if (!HAK_ENV_TINY_ALLOC_1024_METRIC()) return;
    fprintf(stderr, "\n[ALLOC_GE1024] per-class counts (size>=1024)\n");
    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
        uint64_t v = atomic_load_explicit(&g_tiny_alloc_ge1024[cls], memory_order_relaxed);
        if (v) {
            fprintf(stderr, " C%d=%llu", cls, (unsigned long long)v);
        }
    }
    fprintf(stderr, "\n");
}

// TLS SLL pointer diagnostics (optional)
extern _Atomic uint64_t g_tls_sll_invalid_head[TINY_NUM_CLASSES];
extern _Atomic uint64_t g_tls_sll_invalid_push[TINY_NUM_CLASSES];
static void tiny_tls_sll_diag_atexit(void) __attribute__((destructor));
static void tiny_tls_sll_diag_atexit(void) {
#if !HAKMEM_BUILD_RELEASE
    // Priority-2: Use cached ENV
    if (!HAK_ENV_TINY_SLL_DIAG()) return;
    fprintf(stderr, "\n[TLS_SLL_DIAG] invalid head/push counts per class\n");
    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
        uint64_t ih = atomic_load_explicit(&g_tls_sll_invalid_head[cls], memory_order_relaxed);
        uint64_t ip = atomic_load_explicit(&g_tls_sll_invalid_push[cls], memory_order_relaxed);
        if (ih || ip) {
            fprintf(stderr, " C%d: invalid_head=%llu invalid_push=%llu\n",
                    cls, (unsigned long long)ih, (unsigned long long)ip);
        }
    }
#endif
}


// ============================================================================
// Performance Measurement: TLS SLL Statistics Print Function
// ============================================================================
void tls_sll_print_measurements(void) {
    // Check if measurement is enabled
    static int g_measure = -1;
    if (g_measure == -1) {
        const char* e = getenv("HAKMEM_MEASURE_UNIFIED_CACHE");
        g_measure = (e && *e && *e != '0') ? 1 : 0;
    }
    if (!g_measure) {
        return;  // Measurement disabled
    }

    uint64_t pushes = atomic_load_explicit(&g_tls_sll_push_count_global, memory_order_relaxed);
    uint64_t pops = atomic_load_explicit(&g_tls_sll_pop_count_global, memory_order_relaxed);
    uint64_t pop_empty = atomic_load_explicit(&g_tls_sll_pop_empty_count_global, memory_order_relaxed);

    uint64_t total_pop_attempts = pops + pop_empty;
    if (total_pop_attempts == 0 && pushes == 0) {
        fprintf(stderr, "\n========================================\n");
        fprintf(stderr, "TLS SLL Statistics\n");
        fprintf(stderr, "========================================\n");
        fprintf(stderr, "No operations recorded\n");
        fprintf(stderr, "========================================\n\n");
        return;
    }

    double hit_rate = total_pop_attempts > 0 ? (100.0 * pops) / total_pop_attempts : 0.0;
    double empty_rate = total_pop_attempts > 0 ? (100.0 * pop_empty) / total_pop_attempts : 0.0;

    fprintf(stderr, "\n========================================\n");
    fprintf(stderr, "TLS SLL Statistics\n");
    fprintf(stderr, "========================================\n");
    fprintf(stderr, "Total Pushes:     %llu\n", (unsigned long long)pushes);
    fprintf(stderr, "Total Pops:       %llu\n", (unsigned long long)pops);
    fprintf(stderr, "Pop Empty Count:  %llu (%.1f%% of pops)\n",
            (unsigned long long)pop_empty, empty_rate);
    fprintf(stderr, "Hit Rate:         %.1f%%\n", hit_rate);
    fprintf(stderr, "========================================\n\n");
}

// ============================================================================
// ACE Learning Layer & Tiny Guard - EXTRACTED to hakmem_tiny_ace_guard_box.inc
// ============================================================================
#include "hakmem_tiny_ace_guard_box.inc"
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								#include "hakmem_tiny.h"
 								#include "hakmem_tiny_config.h"    // Centralized configuration
-												Phase 7 Task 3: Pre-warm TLS cache (+180-280% improvement!)

MAJOR SUCCESS: HAKMEM now achieves 85-92% of System malloc on tiny
allocations (128-512B) and BEATS System at 146% on 1024B allocations!

Performance Results:
- Random Mixed 128B: 21M → 59M ops/s (+181%) 🚀
- Random Mixed 256B: 19M → 70M ops/s (+268%) 🚀
- Random Mixed 512B: 21M → 68M ops/s (+224%) 🚀
- Random Mixed 1024B: 21M → 65M ops/s (+210%, 146% of System!) 🏆
- Larson 1T: 2.68M ops/s (stable, no regression)

Implementation:
1. Task 3a: Remove profiling overhead in release builds
   - Wrapped RDTSC calls in #if !HAKMEM_BUILD_RELEASE
   - Compiler can eliminate profiling code completely
   - Effect: +2% (2.68M → 2.73M Larson)

2. Task 3b: Simplify refill logic
   - Use constants from hakmem_build_flags.h
   - TLS cache already optimal
   - Effect: No regression

3. Task 3c: Pre-warm TLS cache (GAME CHANGER!)
   - Pre-allocate 16 blocks per class at init
   - Eliminates cold-start penalty
   - Effect: +180-280% improvement 🚀

Root Cause:
The bottleneck was cold-start, not the hot path! First allocation in
each class triggered a SuperSlab refill (100+ cycles). Pre-warming
eliminated this penalty, revealing Phase 7's true potential.

Files Modified:
- core/hakmem_tiny.c: Pre-warm function implementation
- core/box/hak_core_init.inc.h: Pre-warm initialization call
- core/tiny_alloc_fast.inc.h: Profiling overhead removal
- core/hakmem_phase7_config.h: Task 3 constants (NEW)
- core/hakmem_build_flags.h: Phase 7 feature flags
- Makefile: PREWARM_TLS flag, phase7 targets
- CLAUDE.md: Phase 7 success summary
- PHASE7_TASK3_RESULTS.md: Comprehensive results report (NEW)

Build:
make HEADER_CLASSIDX=1 AGGRESSIVE_INLINE=1 PREWARM_TLS=1 phase7-bench

🎉 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-08 12:54:52 +09:00
+								#include "hakmem_phase7_config.h"  // Phase 7: Task 3 constants (PREWARM_COUNT, etc.)
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								#include "hakmem_tiny_superslab.h"
 								#include "box/ss_slab_meta_box.h"  // Phase 3d-A: SlabMeta Box boundary  // Phase 6.22: SuperSlab allocator
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								#include "hakmem_super_registry.h"  // Phase 8.2: SuperSlab registry for memory profiling
 								#include "hakmem_internal.h"
 								#include "hakmem_syscall.h"  // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD)
 								#include "hakmem_tiny_magazine.h"
-												Add Box I (Integrity), Box E (Expansion), and comprehensive P0 debugging infrastructure

## Major Additions

### 1. Box I: Integrity Verification System (NEW - 703 lines)
- Files: core/box/integrity_box.h (267 lines), core/box/integrity_box.c (436 lines)
- Purpose: Unified integrity checking across all HAKMEM subsystems
- Features:
  * 4-level integrity checking (0-4, compile-time controlled)
  * Priority 1: TLS array bounds validation
  * Priority 2: Freelist pointer validation
  * Priority 3: TLS canary monitoring
  * Priority ALPHA: Slab metadata invariant checking (5 invariants)
  * Atomic statistics tracking (thread-safe)
  * Beautiful BOX_BOUNDARY design pattern

### 2. Box E: SuperSlab Expansion System (COMPLETE)
- Files: core/box/superslab_expansion_box.h, core/box/superslab_expansion_box.c
- Purpose: Safe SuperSlab expansion with TLS state guarantee
- Features:
  * Immediate slab 0 binding after expansion
  * TLS state snapshot and restoration
  * Design by Contract (pre/post-conditions, invariants)
  * Thread-safe with mutex protection

### 3. Comprehensive Integrity Checking System
- File: core/hakmem_tiny_integrity.h (NEW)
- Unified validation functions for all allocator subsystems
- Uninitialized memory pattern detection (0xa2, 0xcc, 0xdd, 0xfe)
- Pointer range validation (null-page, kernel-space)

### 4. P0 Bug Investigation - Root Cause Identified
**Bug**: SEGV at iteration 28440 (deterministic with seed 42)
**Pattern**: 0xa2a2a2a2a2a2a2a2 (uninitialized/ASan poisoning)
**Location**: TLS SLL (Single-Linked List) cache layer
**Root Cause**: Race condition or use-after-free in TLS list management (class 0)

**Detection**: Box I successfully caught invalid pointer at exact crash point

### 5. Defensive Improvements
- Defensive memset in SuperSlab allocation (all metadata arrays)
- Enhanced pointer validation with pattern detection
- BOX_BOUNDARY markers throughout codebase (beautiful modular design)
- 5 metadata invariant checks in allocation/free/refill paths

## Integration Points
- Modified 13 files with Box I/E integration
- Added 10+ BOX_BOUNDARY markers
- 5 critical integrity check points in P0 refill path

## Test Results (100K iterations)
- Baseline: 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- P0 Bug: Still crashes at 28440 iterations (TLS SLL race condition)
- Root cause: Identified but not yet fixed (requires deeper investigation)

## Performance
- Box I overhead: Zero in release builds (HAKMEM_INTEGRITY_LEVEL=0)
- Debug builds: Full validation enabled (HAKMEM_INTEGRITY_LEVEL=4)
- Beautiful modular design maintains clean separation of concerns

## Known Issues
- P0 Bug at 28440 iterations: Race condition in TLS SLL cache (class 0)
- Cause: Use-after-free or race in remote free draining
- Next step: Valgrind investigation to pinpoint exact corruption location

## Code Quality
- Total new code: ~1400 lines (Box I + Box E + integrity system)
- Design: Beautiful Box Theory with clear boundaries
- Modularity: Complete separation of concerns
- Documentation: Comprehensive inline comments and BOX_BOUNDARY markers

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-12 02:45:00 +09:00
+								#include "hakmem_tiny_integrity.h"  // PRIORITY 1-4: Corruption detection
-												Box API Phase 1-3: Capacity Manager, Carve-Push, Prewarm 実装

Priority 1-3のBox Modulesを実装し、安全なpre-warming APIを提供。
既存の複雑なprewarmコードを1行のBox API呼び出しに置き換え。

## 新規Box Modules

1. **Box Capacity Manager** (capacity_box.h/c)
   - TLS SLL容量の一元管理
   - adaptive_sizing初期化保証
   - Double-free バグ防止

2. **Box Carve-And-Push** (carve_push_box.h/c)
   - アトミックなblock carve + TLS SLL push
   - All-or-nothing semantics
   - Rollback保証（partial failure防止）

3. **Box Prewarm** (prewarm_box.h/c)
   - 安全なTLS cache pre-warming
   - 初期化依存性を隠蔽
   - シンプルなAPI (1関数呼び出し)

## コード簡略化

hakmem_tiny_init.inc: 20行 → 1行
```c
// BEFORE: 複雑なP0分岐とエラー処理
adaptive_sizing_init();
if (prewarm > 0) {
    #if HAKMEM_TINY_P0_BATCH_REFILL
        int taken = sll_refill_batch_from_ss(5, prewarm);
    #else
        int taken = sll_refill_small_from_ss(5, prewarm);
    #endif
}

// AFTER: Box API 1行
int taken = box_prewarm_tls(5, prewarm);
```

## シンボルExport修正

hakmem_tiny.c: 5つのシンボルをstatic → non-static
- g_tls_slabs[] (TLS slab配列)
- g_sll_multiplier (SLL容量乗数)
- g_sll_cap_override[] (容量オーバーライド)
- superslab_refill() (SuperSlab再充填)
- ss_active_add() (アクティブカウンタ)

## ビルドシステム

Makefile: TINY_BENCH_OBJS_BASEに3つのBox modules追加
- core/box/capacity_box.o
- core/box/carve_push_box.o
- core/box/prewarm_box.o

## 動作確認

✅ Debug build成功
✅ Box Prewarm API動作確認
   [PREWARM] class=5 requested=128 taken=32

## 次のステップ

- Box Refill Manager (Priority 4)
- Box SuperSlab Allocator (Priority 5)
- Release build修正（tiny_debug_ring_record）

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-13 01:45:30 +09:00
+								#include "box/tiny_next_ptr_box.h"  // Box API: next pointer read/write
-												Phase 1 Refactoring Complete: Box-based Logic Consolidation ✅

Summary:
- Task 1.1 ✅: Created tiny_layout_box.h for centralized class/header definitions
- Task 1.2 ✅: Updated tiny_nextptr.h to use layout Box (bitmasking optimization)
- Task 1.3 ✅: Enhanced ptr_conversion_box.h with Phantom Types support
- Task 1.4 ✅: Implemented test_phantom.c for Debug-mode type checking

Verification Results (by Task Agent):
- Box Pattern Compliance: ⭐⭐⭐⭐⭐ (5/5) - MISSION/DESIGN documented
- Type Safety: ⭐⭐⭐⭐⭐ (5/5) - Phantom Types working as designed
- Test Coverage: ⭐⭐⭐☆☆ (3/5) - Compile-time tests OK, runtime tests planned
- Performance: 0 bytes, 0 cycles overhead in Release build
- Build Status: ✅ Success (526KB libhakmem.so, zero warnings)

Key Achievements:
1. Single Source of Truth principle fully implemented
2. Circular dependency eliminated (layout→header→nextptr→conversion)
3. Release build: 100% inlining, zero overhead
4. Debug build: Full type checking with Phantom Types
5. HAK_RET_ALLOC macro migrated to Box API

Known Issues (unrelated to Phase 1):
- TLS_SLL_HDR_RESET from sh8bench (existing, will be resolved in Phase 2)

Next Steps:
- Phase 2 readiness: ✅ READY
- Recommended: Create migration guide + runtime test suite
- Alignment guarantee will be addressed in Phase 2 (Headerless layout)

🤖 Generated with Claude Code + Gemini (implementation) + Task Agent (verification)

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 11:38:11 +09:00
+								#include "box/ptr_conversion_box.h" // Box API: pointer conversion
-												Priority-2 ENV Cache: hakmem_tiny.c (3箇所置換)

【置換ファイル】
- core/hakmem_tiny.c (3箇所 → ENV Cache)

【変更詳細】
1. tiny_heap_v2_print_stats():
   - getenv("HAKMEM_TINY_HEAP_V2_STATS") → HAK_ENV_TINY_HEAP_V2_STATS()

2. tiny_alloc_1024_diag_atexit():
   - getenv("HAKMEM_TINY_ALLOC_1024_METRIC") → HAK_ENV_TINY_ALLOC_1024_METRIC()

3. tiny_tls_sll_diag_atexit():
   - getenv("HAKMEM_TINY_SLL_DIAG") → HAK_ENV_TINY_SLL_DIAG()

- #include "hakmem_env_cache.h" 追加

【効果】
- 診断系atexit()関数からgetenv()呼び出しを排除
- 既存ENV変数を利用 (新規追加なし、カウント: 46変数維持)

【テスト】
✅ make shared → 成功
✅ /tmp/test_mixed3_final → PASSED

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-02 20:54:03 +09:00
+								#include "hakmem_env_cache.h"      // Priority-2: ENV cache
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Phase 1 modules (must come AFTER hakmem_tiny.h for TinyPool definition)
 								#include "hakmem_tiny_batch_refill.h"  // Phase 1: Batch refill/spill for mini-magazine
 								#include "hakmem_tiny_stats.h"     // Phase 1: Batched statistics (replaces XOR RNG)
 								// Phase 2B modules
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								#include "tiny_api.h"  // Consolidated: stats_api, query_api, rss_api, registry_api
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								#include "tiny_tls.h"
 								#include "tiny_debug.h"
-												Cleanup: Consolidate debug ENV vars to HAKMEM_DEBUG_LEVEL

Integrated 4 new debug environment variables added during bug fixes
into the existing unified HAKMEM_DEBUG_LEVEL system (expanded to 0-5 levels).

Changes:

1. Expanded HAKMEM_DEBUG_LEVEL from 0-3 to 0-5 levels:
   - 0 = OFF (production)
   - 1 = ERROR (critical errors)
   - 2 = WARN (warnings)
   - 3 = INFO (allocation paths, header validation, stats)
   - 4 = DEBUG (guard instrumentation, failfast)
   - 5 = TRACE (verbose tracing)

2. Integrated 4 environment variables:
   - HAKMEM_ALLOC_PATH_TRACE → HAKMEM_DEBUG_LEVEL >= 3 (INFO)
   - HAKMEM_TINY_SLL_VALIDATE_HDR → HAKMEM_DEBUG_LEVEL >= 3 (INFO)
   - HAKMEM_TINY_REFILL_FAILFAST → HAKMEM_DEBUG_LEVEL >= 4 (DEBUG)
   - HAKMEM_TINY_GUARD → HAKMEM_DEBUG_LEVEL >= 4 (DEBUG)

3. Kept 2 special-purpose variables (fine-grained control):
   - HAKMEM_TINY_GUARD_CLASS (target class for guard)
   - HAKMEM_TINY_GUARD_MAX (max guard events)

4. Backward compatibility:
   - Legacy ENV vars still work via hak_debug_check_level()
   - New code uses unified system
   - No behavior changes for existing users

Updated files:
- core/hakmem_debug_master.h (level 0-5 expansion)
- core/hakmem_tiny_superslab_internal.h (alloc path trace)
- core/box/tls_sll_box.h (header validation)
- core/tiny_failfast.c (failfast level)
- core/tiny_refill_opt.h (failfast guard)
- core/hakmem_tiny_ace_guard_box.inc (guard enable)
- core/hakmem_tiny.c (include hakmem_debug_master.h)

Impact:
- Simpler debug control: HAKMEM_DEBUG_LEVEL=3 instead of 4 separate ENVs
- Easier to discover/use
- Consistent debug levels across codebase
- Reduces ENV variable proliferation (43+ vars surveyed)

Future work:
- Consolidate remaining 39+ debug variables (documented in survey)
- Gradual migration over 2-3 releases

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-29 06:57:03 +09:00
+								#include "hakmem_debug_master.h"  // For unified debug level control
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								#include "tiny_mmap_gate.h"
 								#include "tiny_debug_ring.h"
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								#include "tiny_route.h"
-												Code Cleanup: Remove false positives, redundant validations, and reduce verbose logging

Following the C7 stride upgrade fix (commit 23c0d9541), this commit performs
comprehensive cleanup to improve code quality and reduce debug noise.

## Changes

### 1. Disable False Positive Checks (tiny_nextptr.h)
- **Disabled**: NXT_MISALIGN validation block with `#if 0`
- **Reason**: Produces false positives due to slab base offsets (2048, 65536)
  not being stride-aligned, causing all blocks to appear "misaligned"
- **TODO**: Reimplement to check stride DISTANCE between consecutive blocks
  instead of absolute alignment to stride boundaries

### 2. Remove Redundant Geometry Validations

**hakmem_tiny_refill_p0.inc.h (P0 batch refill)**
- Removed 25-line CARVE_GEOMETRY_FIX validation block
- Replaced with NOTE explaining redundancy
- **Reason**: Stride table is now correct in tiny_block_stride_for_class(),
  defense-in-depth validation adds overhead without benefit

**ss_legacy_backend_box.c (legacy backend)**
- Removed 18-line LEGACY_FIX_GEOMETRY validation block
- Replaced with NOTE explaining redundancy
- **Reason**: Shared_pool validates geometry at acquisition time

### 3. Reduce Verbose Logging

**hakmem_shared_pool.c (sp_fix_geometry_if_needed)**
- Made SP_FIX_GEOMETRY logging conditional on `!HAKMEM_BUILD_RELEASE`
- **Reason**: Geometry fixes are expected during stride upgrades,
  no need to log in release builds

### 4. Verification
- Build: ✅ Successful (LTO warnings expected)
- Test: ✅ 10K iterations (1.87M ops/s, no crashes)
- NXT_MISALIGN false positives: ✅ Eliminated

## Files Modified
- core/tiny_nextptr.h - Disabled false positive NXT_MISALIGN check
- core/hakmem_tiny_refill_p0.inc.h - Removed redundant CARVE validation
- core/box/ss_legacy_backend_box.c - Removed redundant LEGACY validation
- core/hakmem_shared_pool.c - Made SP_FIX_GEOMETRY logging debug-only

## Impact
- **Code clarity**: Removed 43 lines of redundant validation code
- **Debug noise**: Reduced false positive diagnostics
- **Performance**: Eliminated overhead from redundant geometry checks
- **Maintainability**: Single source of truth for geometry validation

🧹 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 23:00:24 +09:00
+								#include "front/tiny_heap_v2.h"
-												C7 v2: add lease helpers and v2 page reset

											
										
										
											2025-12-08 14:40:03 +09:00
+								#include "box/tiny_front_stats_box.h"
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								#include "tiny_tls_guard.h"
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								#include "tiny_ready.h"
-												Fix C7 warm/TLS Release path and unify debug instrumentation

											
										
										
											2025-12-05 23:41:01 +09:00
+								#include "box/c7_meta_used_counter_box.h"
-												Add TinyHeap class mask and extend routing

											
										
										
											2025-12-07 22:49:28 +09:00
+								#include "box/tiny_c7_hotbox.h"
 								#include "box/tiny_heap_box.h"
-												C7 v2: add lease helpers and v2 page reset

											
										
										
											2025-12-08 14:40:03 +09:00
+								#include "box/tiny_hotheap_v2_box.h"
 								#include "box/tiny_route_env_box.h"
-												Boxify superslab registry, add bench profile, and document C7 hotpath experiments

											
										
										
											2025-12-07 03:12:27 +09:00
+								#include "box/super_reg_box.h"
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								#include "hakmem_tiny_tls_list.h"
 								#include "hakmem_tiny_remote_target.h" // Phase 2C-1: Remote target queue
 								#include "hakmem_tiny_bg_spill.h"      // Phase 2C-2: Background spill queue
-												feat: Phase 7 + Phase 2 - Massive performance & stability improvements

Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓

Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
  Result: +180-280% improvement, 85-146% of System malloc

Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)

Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
  Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
  Result: 50% → 95% stability (19/20 4T success)

Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
  Files: core/tiny_adaptive_sizing.c/h (new)

Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
  Files: core/hakmem_bigcache.c/h
  Expected: +10-20% cache hit rate

Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)

Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis

Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files

Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)

🤖 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-08 17:08:00 +09:00
+								#include "tiny_adaptive_sizing.h"      // Phase 2b: Adaptive TLS cache sizing
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// NOTE: hakmem_tiny_tls_ops.h included later (after type definitions)
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								#include "tiny_system.h"  // Consolidated: stdio, stdlib, string, etc.
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								#include "hakmem_prof.h"
 								#include "hakmem_trace.h"   // Optional USDT (perf) tracepoints
 								extern uint64_t g_bytes_allocated;  // from hakmem_tiny_superslab.c
-												Refactor: Extract 5 Box modules from hakmem_tiny.c (-52% size reduction)

Split hakmem_tiny.c (2081 lines) into focused modules for better maintainability.

## Changes

**hakmem_tiny.c**: 2081 → 995 lines (-1086 lines, -52% reduction)

## Extracted Modules (5 boxes)

1. **config_box** (211 lines)
   - Size class tables, integrity counters
   - Debug flags, benchmark macros
   - HAK_RET_ALLOC/HAK_STAT_FREE instrumentation

2. **publish_box** (419 lines)
   - Publish/Adopt counters and statistics
   - Bench mailbox, partial ring
   - Live cap/Hot slot management
   - TLS helper functions (tiny_tls_default_*)

3. **globals_box** (256 lines)
   - Global variable declarations (~70 variables)
   - TinyPool instance and initialization flag
   - TLS variables (g_tls_lists, g_fast_head, g_fast_count)
   - SuperSlab configuration (partial ring, empty reserves)
   - Adopt gate functions

4. **phase6_wrappers_box** (122 lines)
   - Phase 6 Box Theory wrapper layer
   - hak_tiny_alloc_fast_wrapper()
   - hak_tiny_free_fast_wrapper()
   - Diagnostic instrumentation

5. **ace_guard_box** (100 lines)
   - ACE Learning Layer (hkm_ace_set_drain_threshold)
   - FastCache API (tiny_fc_room, tiny_fc_push_bulk)
   - Tiny Guard debugging system (5 functions)

## Benefits

- **Readability**: Giant 2k file → focused 1k core + 5 coherent modules
- **Maintainability**: Each box has clear responsibility and boundaries
- **Build**: All modules compile successfully ✅

## Technical Details

- Phase 1: ChatGPT extracted config_box + publish_box (-625 lines)
- Phase 2-4: Claude extracted globals_box + phase6_wrappers_box + ace_guard_box (-461 lines)
- All extractions use .inc files (same translation unit, preserves static/TLS linkage)
- Fixed Makefile: Added tiny_sizeclass_hist_box.o to OBJS_BASE and BENCH_HAKMEM_OBJS_BASE

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 01:16:45 +09:00
+								// Tiny allocator configuration, debug counters, and return helpers
 								#include "hakmem_tiny_config_box.inc"
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
 								// Debug: TLS SLL last push tracking (for core/box/tls_sll_box.h)
 								// ============================================================================
 								__thread hak_base_ptr_t s_tls_sll_last_push[TINY_NUM_CLASSES] = {0};
-												Add TinyHeap class mask and extend routing

											
										
										
											2025-12-07 22:49:28 +09:00
+								__thread tiny_heap_ctx_t g_tiny_heap_ctx;
 								__thread int g_tiny_heap_ctx_init = 0;
-												C7 v2: add lease helpers and v2 page reset

											
										
										
											2025-12-08 14:40:03 +09:00
+								__thread tiny_hotheap_ctx_v2* g_tiny_hotheap_ctx_v2 = NULL;
 								TinyHeapClassStats g_tiny_heap_stats[TINY_NUM_CLASSES] = {0};
 								TinyC7PageStats g_c7_page_stats = {0};
 								tiny_route_kind_t g_tiny_route_class[TINY_NUM_CLASSES] = {0};
 								int g_tiny_route_snapshot_done = 0;
 								_Atomic uint64_t g_tiny_front_alloc_class[TINY_NUM_CLASSES] = {0};
 								_Atomic uint64_t g_tiny_front_free_class[TINY_NUM_CLASSES] = {0};
-												Add TinyHeap class mask and extend routing

											
										
										
											2025-12-07 22:49:28 +09:00
-												C7 v2: add lease helpers and v2 page reset

											
										
										
											2025-12-08 14:40:03 +09:00
+								static int tiny_heap_stats_dump_enabled(void) {
-												Add TinyHeap class mask and extend routing

											
										
										
											2025-12-07 22:49:28 +09:00
+								    static int g = -1;
 								    if (__builtin_expect(g == -1, 0)) {
-												C7 v2: add lease helpers and v2 page reset

											
										
										
											2025-12-08 14:40:03 +09:00
+								        const char* eh = getenv("HAKMEM_TINY_HEAP_STATS_DUMP");
-												Add TinyHeap class mask and extend routing

											
										
										
											2025-12-07 22:49:28 +09:00
+								        const char* e = getenv("HAKMEM_TINY_C7_HEAP_STATS_DUMP");
-												C7 v2: add lease helpers and v2 page reset

											
										
										
											2025-12-08 14:40:03 +09:00
+								        g = ((eh && *eh && *eh != '0') || (e && *e && *e != '0')) ? 1 : 0;
-												Add TinyHeap class mask and extend routing

											
										
										
											2025-12-07 22:49:28 +09:00
+								    }
 								    return g;
 								}
 								__attribute__((destructor))
-												C7 v2: add lease helpers and v2 page reset

											
										
										
											2025-12-08 14:40:03 +09:00
+								static void tiny_heap_stats_dump(void) {
 								    if (!tiny_heap_stats_enabled() || !tiny_heap_stats_dump_enabled()) {
-												Add TinyHeap class mask and extend routing

											
										
										
											2025-12-07 22:49:28 +09:00
+								        return;
 								    }
-												C7 v2: add lease helpers and v2 page reset

											
										
										
											2025-12-08 14:40:03 +09:00
+								    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
 								        TinyHeapClassStats snap = {
 								            .alloc_fast_current = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_fast_current, memory_order_relaxed),
 								            .alloc_slow_prepare = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_slow_prepare, memory_order_relaxed),
 								            .free_fast_local = atomic_load_explicit(&g_tiny_heap_stats[cls].free_fast_local, memory_order_relaxed),
 								            .free_slow_fallback = atomic_load_explicit(&g_tiny_heap_stats[cls].free_slow_fallback, memory_order_relaxed),
 								            .alloc_prepare_fail = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_prepare_fail, memory_order_relaxed),
 								            .alloc_fail = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_fail, memory_order_relaxed),
 								        };
 								        if (snap.alloc_fast_current == 0 && snap.alloc_slow_prepare == 0 &&
 								            snap.free_fast_local == 0 && snap.free_slow_fallback == 0 &&
 								            snap.alloc_prepare_fail == 0 && snap.alloc_fail == 0) {
 								            continue;
 								        }
 								        fprintf(stderr,
 								                "[HEAP_STATS cls=%d] alloc_fast_current=%llu alloc_slow_prepare=%llu free_fast_local=%llu free_slow_fallback=%llu alloc_prepare_fail=%llu alloc_fail=%llu\n",
 								                cls,
 								                (unsigned long long)snap.alloc_fast_current,
 								                (unsigned long long)snap.alloc_slow_prepare,
 								                (unsigned long long)snap.free_fast_local,
 								                (unsigned long long)snap.free_slow_fallback,
 								                (unsigned long long)snap.alloc_prepare_fail,
 								                (unsigned long long)snap.alloc_fail);
 								    }
 								    TinyC7PageStats ps = {
 								        .prepare_calls = atomic_load_explicit(&g_c7_page_stats.prepare_calls, memory_order_relaxed),
 								        .prepare_with_current_null = atomic_load_explicit(&g_c7_page_stats.prepare_with_current_null, memory_order_relaxed),
 								        .prepare_from_partial = atomic_load_explicit(&g_c7_page_stats.prepare_from_partial, memory_order_relaxed),
 								        .current_set_from_free = atomic_load_explicit(&g_c7_page_stats.current_set_from_free, memory_order_relaxed),
 								        .current_dropped_to_partial = atomic_load_explicit(&g_c7_page_stats.current_dropped_to_partial, memory_order_relaxed),
-												Add TinyHeap class mask and extend routing

											
										
										
											2025-12-07 22:49:28 +09:00
+								    };
-												C7 v2: add lease helpers and v2 page reset

											
										
										
											2025-12-08 14:40:03 +09:00
+								    if (ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial ||
 								        ps.current_set_from_free || ps.current_dropped_to_partial) {
 								        fprintf(stderr,
 								                "[C7_PAGE_STATS] prepare_calls=%llu prepare_with_current_null=%llu prepare_from_partial=%llu current_set_from_free=%llu current_dropped_to_partial=%llu\n",
 								                (unsigned long long)ps.prepare_calls,
 								                (unsigned long long)ps.prepare_with_current_null,
 								                (unsigned long long)ps.prepare_from_partial,
 								                (unsigned long long)ps.current_set_from_free,
 								                (unsigned long long)ps.current_dropped_to_partial);
 								        fflush(stderr);
 								    }
 								}
 								__attribute__((destructor))
 								static void tiny_front_class_stats_dump(void) {
 								    if (!tiny_front_class_stats_dump_enabled()) {
 								        return;
 								    }
 								    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
 								        uint64_t a = atomic_load_explicit(&g_tiny_front_alloc_class[cls], memory_order_relaxed);
 								        uint64_t f = atomic_load_explicit(&g_tiny_front_free_class[cls], memory_order_relaxed);
 								        if (a == 0 && f == 0) {
 								            continue;
 								        }
 								        fprintf(stderr, "[FRONT_CLASS cls=%d] alloc=%llu free=%llu\n",
 								                cls, (unsigned long long)a, (unsigned long long)f);
 								    }
-												Add TinyHeap class mask and extend routing

											
										
										
											2025-12-07 22:49:28 +09:00
+								}
 								__attribute__((destructor))
 								static void tiny_c7_delta_debug_destructor(void) {
-												C7 v2: add lease helpers and v2 page reset

											
										
										
											2025-12-08 14:40:03 +09:00
+								    if (tiny_c7_meta_light_enabled() && tiny_c7_delta_debug_enabled()) {
 								        tiny_c7_heap_debug_dump_deltas();
 								    }
 								    if (tiny_heap_meta_light_enabled_for_class(6) && tiny_c6_delta_debug_enabled()) {
 								        tiny_c6_heap_debug_dump_deltas();
 								    }
 								}
 								// =============================================================================
 								// TinyHotHeap v2 (Phase30/31 wiring). Currently C7-only thin wrapper.
 								// =============================================================================
 								static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc = 0;
 								static _Atomic uint64_t g_tiny_hotheap_v2_c7_free = 0;
 								static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_fallback = 0;
 								static _Atomic uint64_t g_tiny_hotheap_v2_c7_free_fallback = 0;
 								tiny_hotheap_ctx_v2* tiny_hotheap_v2_tls_get(void) {
 								    tiny_hotheap_ctx_v2* ctx = g_tiny_hotheap_ctx_v2;
 								    if (__builtin_expect(ctx == NULL, 0)) {
 								        ctx = (tiny_hotheap_ctx_v2*)calloc(1, sizeof(tiny_hotheap_ctx_v2));
 								        if (__builtin_expect(ctx == NULL, 0)) {
 								            fprintf(stderr, "[TinyHotHeapV2] TLS alloc failed (OOM)\n");
 								            abort();
 								        }
 								        g_tiny_hotheap_ctx_v2 = ctx;
 								        // C7 用 stride を最初にだけ設定（他クラスは未使用のまま）
 								        ctx->cls[7].stride = (uint16_t)tiny_stride_for_class(7);
 								    }
 								    return ctx;
 								}
 								void* tiny_hotheap_v2_alloc(uint8_t class_idx) {
 								    atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc, 1, memory_order_relaxed);
 								    if (__builtin_expect(class_idx != 7, 0)) {
 								        return NULL;  // いまは C7 専用
 								    }
 								    tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get();
 								    tiny_hotheap_class_v2* vhcls = &v2ctx->cls[7];
 								    tiny_hotheap_page_v2* v2page = vhcls ? vhcls->current_page : NULL;
 								    if (!v2page && vhcls) {
 								        v2page = &vhcls->storage_page;
 								        tiny_hotheap_v2_page_reset(v2page);
 								        vhcls->current_page = v2page;
 								    }
 								    tiny_heap_ctx_t* v1ctx = tiny_heap_ctx_for_thread();
 								    tiny_heap_class_t* v1hcls = tiny_heap_class(v1ctx, 7);
 								    TinyHeapClassStats* stats = tiny_heap_stats_for_class(7);
 								    // Hot path: current_page の lease をそのまま使う
 								    if (v2page && v2page->lease_page && v1hcls) {
 								        tiny_heap_page_t* ipage = v2page->lease_page;
 								        if (ipage->free_list || ipage->used < ipage->capacity) {
 								            void* user = tiny_heap_page_pop(v1hcls, 7, ipage);
 								            if (user) {
 								                if (ipage->used >= ipage->capacity && ipage->free_list == NULL) {
 								                    tiny_heap_page_mark_full(v1hcls, ipage);
 								                }
 								                if (__builtin_expect(stats != NULL, 0)) {
 								                    atomic_fetch_add_explicit(&stats->alloc_fast_current, 1, memory_order_relaxed);
 								                }
 								                v2page->freelist = ipage->free_list;
 								                v2page->used = ipage->used;
 								                return user;
 								            }
 								        }
 								    }
 								    if (__builtin_expect(stats != NULL, 0)) {
 								        atomic_fetch_add_explicit(&stats->alloc_slow_prepare, 1, memory_order_relaxed);
 								    }
 								    // Lease a page from v1 (C7 SAFE) and wrap it
 								    TinyHeapPageLease lease = tiny_heap_c7_lease_page_for_v2();
 								    if (!lease.page || !vhcls || !v1hcls) {
 								        atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_fallback, 1, memory_order_relaxed);
 								        size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(7)) : tiny_stride_for_class(7);
 								        return tiny_c7_alloc_fast(size);  // safety fallback to v1
 								    }
 								    if (!v2page) {
 								        v2page = &vhcls->storage_page;
 								        tiny_hotheap_v2_page_reset(v2page);
 								        vhcls->current_page = v2page;
 								    }
 								    v2page->lease_page = lease.page;
 								    v2page->meta = lease.meta;
 								    v2page->ss = lease.ss;
 								    v2page->base = lease.base;
 								    v2page->capacity = lease.capacity;
 								    v2page->slab_idx = lease.slab_idx;
 								    v2page->freelist = lease.freelist;
 								    v2page->used = lease.page->used;
 								    if (lease.page->free_list || lease.page->used < lease.page->capacity) {
 								        void* user = tiny_heap_page_pop(v1hcls, 7, lease.page);
 								        if (user) {
 								            if (lease.page->used >= lease.page->capacity && lease.page->free_list == NULL) {
 								                tiny_heap_page_mark_full(v1hcls, lease.page);
 								            }
 								            if (__builtin_expect(stats != NULL, 0)) {
 								                atomic_fetch_add_explicit(&stats->alloc_fast_current, 1, memory_order_relaxed);
 								            }
 								            v2page->freelist = lease.page->free_list;
 								            v2page->used = lease.page->used;
 								            return user;
 								        }
 								    }
 								    // Lease 取得後でも pop できなければ v1 に委譲
 								    atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_fallback, 1, memory_order_relaxed);
 								    size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(7)) : tiny_stride_for_class(7);
 								    return tiny_c7_alloc_fast(size);
 								}
 								void tiny_hotheap_v2_free(uint8_t class_idx, void* p, void* meta) {
 								    if (__builtin_expect(class_idx != 7, 0)) {
-												Add TinyHeap class mask and extend routing

											
										
										
											2025-12-07 22:49:28 +09:00
+								        return;
 								    }
-												C7 v2: add lease helpers and v2 page reset

											
										
										
											2025-12-08 14:40:03 +09:00
+								    atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_free, 1, memory_order_relaxed);
 								    tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get();
 								    tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[7] : NULL;
 								    tiny_hotheap_page_v2* v2page = vhcls ? vhcls->current_page : NULL;
 								    TinySlabMeta* meta_ptr = (TinySlabMeta*)meta;
 								    tiny_heap_ctx_t* v1ctx = tiny_heap_ctx_for_thread();
 								    tiny_heap_class_t* v1hcls = tiny_heap_class(v1ctx, 7);
 								    if (v2page && v2page->lease_page && meta_ptr && v1hcls &&
 								        v2page->meta == meta_ptr && tiny_heap_ptr_in_page_range(v2page->lease_page, p)) {
 								        tiny_heap_page_free_local(v1ctx, 7, v2page->lease_page, p);
 								        v2page->freelist = v2page->lease_page->free_list;
 								        v2page->used = v2page->lease_page->used;
 								        vhcls->current_page = v2page;  // keep pinned
 								        return;
 								    }
 								    // Fallback: mimic v1 free path
 								    atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_free_fallback, 1, memory_order_relaxed);
 								    SuperSlab* ss = hak_super_lookup(p);
 								    if (ss && ss->magic == SUPERSLAB_MAGIC) {
 								        int slab_idx = slab_index_for(ss, p);
 								        if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
 								            tiny_c7_free_fast_with_meta(ss, slab_idx, p);
 								            return;
 								        }
 								    }
 								    tiny_c7_free_fast(p);
-												Add TinyHeap class mask and extend routing

											
										
										
											2025-12-07 22:49:28 +09:00
+								}
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
 								#if !HAKMEM_BUILD_RELEASE
 								// Helper to dump last push from core/hakmem.c (SEGV handler)
 								// Must be visible to other TUs (extern in hakmem_tiny.h or similar if needed,
 								// but SEGV handler is in core/hakmem.c which can dlsym or weak link it)
 								__attribute__((noinline))
 								void tiny_debug_dump_last_push(int cls) {
 								    hak_base_ptr_t p = s_tls_sll_last_push[cls];
 								    void* raw = HAK_BASE_TO_RAW(p);
 								    fprintf(stderr, "[DEBUG] s_tls_sll_last_push[%d] = %p\n", cls, raw);
 								    if (raw && (uintptr_t)raw > 4096) {
 								        unsigned long* vals = (unsigned long*)raw;
 								        fprintf(stderr, "[DEBUG] Memory at %p: %016lx %016lx\n", raw, vals[0], vals[1]);
 								    }
 								}
 								#endif
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Forward declarations for static helpers used before definition
 								struct TinySlab; // forward
 								static void move_to_free_list(int class_idx, struct TinySlab* target_slab);
 								static void move_to_full_list(int class_idx, struct TinySlab* target_slab);
 								static void release_slab(struct TinySlab* slab);
 								static TinySlab* allocate_new_slab(int class_idx);
 								static void tiny_tls_cache_drain(int class_idx);
 								static void tiny_apply_mem_diet(void);
 								// Phase 6.23: SuperSlab allocation forward declaration
 								static inline void* hak_tiny_alloc_superslab(int class_idx);
 								static inline void* superslab_tls_bump_fast(int class_idx);
-												Box API Phase 1-3: Capacity Manager, Carve-Push, Prewarm 実装

Priority 1-3のBox Modulesを実装し、安全なpre-warming APIを提供。
既存の複雑なprewarmコードを1行のBox API呼び出しに置き換え。

## 新規Box Modules

1. **Box Capacity Manager** (capacity_box.h/c)
   - TLS SLL容量の一元管理
   - adaptive_sizing初期化保証
   - Double-free バグ防止

2. **Box Carve-And-Push** (carve_push_box.h/c)
   - アトミックなblock carve + TLS SLL push
   - All-or-nothing semantics
   - Rollback保証（partial failure防止）

3. **Box Prewarm** (prewarm_box.h/c)
   - 安全なTLS cache pre-warming
   - 初期化依存性を隠蔽
   - シンプルなAPI (1関数呼び出し)

## コード簡略化

hakmem_tiny_init.inc: 20行 → 1行
```c
// BEFORE: 複雑なP0分岐とエラー処理
adaptive_sizing_init();
if (prewarm > 0) {
    #if HAKMEM_TINY_P0_BATCH_REFILL
        int taken = sll_refill_batch_from_ss(5, prewarm);
    #else
        int taken = sll_refill_small_from_ss(5, prewarm);
    #endif
}

// AFTER: Box API 1行
int taken = box_prewarm_tls(5, prewarm);
```

## シンボルExport修正

hakmem_tiny.c: 5つのシンボルをstatic → non-static
- g_tls_slabs[] (TLS slab配列)
- g_sll_multiplier (SLL容量乗数)
- g_sll_cap_override[] (容量オーバーライド)
- superslab_refill() (SuperSlab再充填)
- ss_active_add() (アクティブカウンタ)

## ビルドシステム

Makefile: TINY_BENCH_OBJS_BASEに3つのBox modules追加
- core/box/capacity_box.o
- core/box/carve_push_box.o
- core/box/prewarm_box.o

## 動作確認

✅ Debug build成功
✅ Box Prewarm API動作確認
   [PREWARM] class=5 requested=128 taken=32

## 次のステップ

- Box Refill Manager (Priority 4)
- Box SuperSlab Allocator (Priority 5)
- Release build修正（tiny_debug_ring_record）

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-13 01:45:30 +09:00
+								SuperSlab* superslab_refill(int class_idx);
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx);
 								static inline uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap);
 								// Forward decl: used by tiny_spec_pop_path before its definition
-												Tiny: Enable P0 batch refill by default + docs and task update

Summary
- Default P0 ON: Build-time HAKMEM_TINY_P0_BATCH_REFILL=1 remains; runtime gate now defaults to ON
  (HAKMEM_TINY_P0_ENABLE unset or not '0'). Kill switch preserved via HAKMEM_TINY_P0_DISABLE=1.
- Fix critical bug: After freelist→SLL batch splice, increment TinySlabMeta::used by 'from_freelist'
  to mirror non-P0 behavior (prevents under-accounting and follow-on carve invariants from breaking).
- Add low-overhead A/B toggles for triage: HAKMEM_TINY_P0_NO_DRAIN (skip remote drain),
  HAKMEM_TINY_P0_LOG (emit [P0_COUNTER_OK/MISMATCH] based on total_active_blocks delta).
- Keep linear carve fail-fast guards across simple/general/TLS-bump paths.

Perf (1T, 100k×256B)
- P0 OFF: ~2.73M ops/s (stable)
- P0 ON (no drain): ~2.45M ops/s
- P0 ON (normal drain): ~2.76M ops/s (fastest)

Known
- Rare [P0_COUNTER_MISMATCH] warnings persist (non-fatal). Continue auditing active/used
  balance around batch freelist splice and remote drain splice.

Docs
- Add docs/TINY_P0_BATCH_REFILL.md (runtime switches, behavior, perf notes).
- Update CURRENT_TASK.md with Tiny P0 status (default ON) and next steps.

											
										
										
											2025-11-09 22:12:34 +09:00
+								#if HAKMEM_TINY_P0_BATCH_REFILL
-												Phase 12: Shared SuperSlab Pool implementation (WIP - runtime crash)

## Summary
Implemented Phase 12 Shared SuperSlab Pool (mimalloc-style) to address
SuperSlab allocation churn (877 SuperSlabs → 100-200 target).

## Implementation (ChatGPT + Claude)
1. **Metadata changes** (superslab_types.h):
   - Added class_idx to TinySlabMeta (per-slab dynamic class)
   - Removed size_class from SuperSlab (no longer per-SuperSlab)
   - Changed owner_tid (16-bit) → owner_tid_low (8-bit)

2. **Shared Pool** (hakmem_shared_pool.{h,c}):
   - Global pool shared by all size classes
   - shared_pool_acquire_slab() - Get free slab for class_idx
   - shared_pool_release_slab() - Return slab when empty
   - Per-class hints for fast path optimization

3. **Integration** (23 files modified):
   - Updated all ss->size_class → meta->class_idx
   - Updated all meta->owner_tid → meta->owner_tid_low
   - superslab_refill() now uses shared pool
   - Free path releases empty slabs back to pool

4. **Build system** (Makefile):
   - Added hakmem_shared_pool.o to OBJS_BASE and TINY_BENCH_OBJS_BASE

## Status: ⚠️ Build OK, Runtime CRASH

**Build**: ✅ SUCCESS
- All 23 files compile without errors
- Only warnings: superslab_allocate type mismatch (legacy code)

**Runtime**: ❌ SEGFAULT
- Crash location: sll_refill_small_from_ss()
- Exit code: 139 (SIGSEGV)
- Test case: ./bench_random_mixed_hakmem 1000 256 42

## Known Issues
1. **SEGFAULT in refill path** - Likely shared_pool_acquire_slab() issue
2. **Legacy superslab_allocate()** still exists (type mismatch warning)
3. **Remaining TODOs** from design doc:
   - SuperSlab physical layout integration
   - slab_handle.h cleanup
   - Remove old per-class head implementation

## Next Steps
1. Debug SEGFAULT (gdb backtrace shows sll_refill_small_from_ss)
2. Fix shared_pool_acquire_slab() or superslab_init_slab()
3. Basic functionality test (1K → 100K iterations)
4. Measure SuperSlab count reduction (877 → 100-200)
5. Performance benchmark (+650-860% expected)

## Files Changed (25 files)
core/box/free_local_box.c
core/box/free_remote_box.c
core/box/front_gate_classifier.c
core/hakmem_super_registry.c
core/hakmem_tiny.c
core/hakmem_tiny_bg_spill.c
core/hakmem_tiny_free.inc
core/hakmem_tiny_lifecycle.inc
core/hakmem_tiny_magazine.c
core/hakmem_tiny_query.c
core/hakmem_tiny_refill.inc.h
core/hakmem_tiny_superslab.c
core/hakmem_tiny_superslab.h
core/hakmem_tiny_tls_ops.h
core/slab_handle.h
core/superslab/superslab_inline.h
core/superslab/superslab_types.h
core/tiny_debug.h
core/tiny_free_fast.inc.h
core/tiny_free_magazine.inc.h
core/tiny_remote.c
core/tiny_superslab_alloc.inc.h
core/tiny_superslab_free.inc.h
Makefile

## New Files (3 files)
PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md
core/hakmem_shared_pool.c
core/hakmem_shared_pool.h

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>

											
										
										
											2025-11-13 16:33:03 +09:00
+								// P0 enabled: sll_refill_batch_from_ss is defined in hakmem_tiny_refill_p0.inc.h
-												Tiny: Enable P0 batch refill by default + docs and task update

Summary
- Default P0 ON: Build-time HAKMEM_TINY_P0_BATCH_REFILL=1 remains; runtime gate now defaults to ON
  (HAKMEM_TINY_P0_ENABLE unset or not '0'). Kill switch preserved via HAKMEM_TINY_P0_DISABLE=1.
- Fix critical bug: After freelist→SLL batch splice, increment TinySlabMeta::used by 'from_freelist'
  to mirror non-P0 behavior (prevents under-accounting and follow-on carve invariants from breaking).
- Add low-overhead A/B toggles for triage: HAKMEM_TINY_P0_NO_DRAIN (skip remote drain),
  HAKMEM_TINY_P0_LOG (emit [P0_COUNTER_OK/MISMATCH] based on total_active_blocks delta).
- Keep linear carve fail-fast guards across simple/general/TLS-bump paths.

Perf (1T, 100k×256B)
- P0 OFF: ~2.73M ops/s (stable)
- P0 ON (no drain): ~2.45M ops/s
- P0 ON (normal drain): ~2.76M ops/s (fastest)

Known
- Rare [P0_COUNTER_MISMATCH] warnings persist (non-fatal). Continue auditing active/used
  balance around batch freelist splice and remote drain splice.

Docs
- Add docs/TINY_P0_BATCH_REFILL.md (runtime switches, behavior, perf notes).
- Update CURRENT_TASK.md with Tiny P0 status (default ON) and next steps.

											
										
										
											2025-11-09 22:12:34 +09:00
+								static inline int sll_refill_batch_from_ss(int class_idx, int max_take);
 								#else
-												Phase 12: Shared SuperSlab Pool implementation (WIP - runtime crash)

## Summary
Implemented Phase 12 Shared SuperSlab Pool (mimalloc-style) to address
SuperSlab allocation churn (877 SuperSlabs → 100-200 target).

## Implementation (ChatGPT + Claude)
1. **Metadata changes** (superslab_types.h):
   - Added class_idx to TinySlabMeta (per-slab dynamic class)
   - Removed size_class from SuperSlab (no longer per-SuperSlab)
   - Changed owner_tid (16-bit) → owner_tid_low (8-bit)

2. **Shared Pool** (hakmem_shared_pool.{h,c}):
   - Global pool shared by all size classes
   - shared_pool_acquire_slab() - Get free slab for class_idx
   - shared_pool_release_slab() - Return slab when empty
   - Per-class hints for fast path optimization

3. **Integration** (23 files modified):
   - Updated all ss->size_class → meta->class_idx
   - Updated all meta->owner_tid → meta->owner_tid_low
   - superslab_refill() now uses shared pool
   - Free path releases empty slabs back to pool

4. **Build system** (Makefile):
   - Added hakmem_shared_pool.o to OBJS_BASE and TINY_BENCH_OBJS_BASE

## Status: ⚠️ Build OK, Runtime CRASH

**Build**: ✅ SUCCESS
- All 23 files compile without errors
- Only warnings: superslab_allocate type mismatch (legacy code)

**Runtime**: ❌ SEGFAULT
- Crash location: sll_refill_small_from_ss()
- Exit code: 139 (SIGSEGV)
- Test case: ./bench_random_mixed_hakmem 1000 256 42

## Known Issues
1. **SEGFAULT in refill path** - Likely shared_pool_acquire_slab() issue
2. **Legacy superslab_allocate()** still exists (type mismatch warning)
3. **Remaining TODOs** from design doc:
   - SuperSlab physical layout integration
   - slab_handle.h cleanup
   - Remove old per-class head implementation

## Next Steps
1. Debug SEGFAULT (gdb backtrace shows sll_refill_small_from_ss)
2. Fix shared_pool_acquire_slab() or superslab_init_slab()
3. Basic functionality test (1K → 100K iterations)
4. Measure SuperSlab count reduction (877 → 100-200)
5. Performance benchmark (+650-860% expected)

## Files Changed (25 files)
core/box/free_local_box.c
core/box/free_remote_box.c
core/box/front_gate_classifier.c
core/hakmem_super_registry.c
core/hakmem_tiny.c
core/hakmem_tiny_bg_spill.c
core/hakmem_tiny_free.inc
core/hakmem_tiny_lifecycle.inc
core/hakmem_tiny_magazine.c
core/hakmem_tiny_query.c
core/hakmem_tiny_refill.inc.h
core/hakmem_tiny_superslab.c
core/hakmem_tiny_superslab.h
core/hakmem_tiny_tls_ops.h
core/slab_handle.h
core/superslab/superslab_inline.h
core/superslab/superslab_types.h
core/tiny_debug.h
core/tiny_free_fast.inc.h
core/tiny_free_magazine.inc.h
core/tiny_remote.c
core/tiny_superslab_alloc.inc.h
core/tiny_superslab_free.inc.h
Makefile

## New Files (3 files)
PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md
core/hakmem_shared_pool.c
core/hakmem_shared_pool.h

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>

											
										
										
											2025-11-13 16:33:03 +09:00
+								// Phase 12: sll_refill_small_from_ss is defined in hakmem_tiny_refill.inc.h
 								// Only a single implementation exists there; declare here for callers.
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
 								int sll_refill_small_from_ss(int class_idx, int max_take);
 								#else
 								static inline int sll_refill_small_from_ss(int class_idx, int max_take);
 								#endif
-												Tiny: Enable P0 batch refill by default + docs and task update

Summary
- Default P0 ON: Build-time HAKMEM_TINY_P0_BATCH_REFILL=1 remains; runtime gate now defaults to ON
  (HAKMEM_TINY_P0_ENABLE unset or not '0'). Kill switch preserved via HAKMEM_TINY_P0_DISABLE=1.
- Fix critical bug: After freelist→SLL batch splice, increment TinySlabMeta::used by 'from_freelist'
  to mirror non-P0 behavior (prevents under-accounting and follow-on carve invariants from breaking).
- Add low-overhead A/B toggles for triage: HAKMEM_TINY_P0_NO_DRAIN (skip remote drain),
  HAKMEM_TINY_P0_LOG (emit [P0_COUNTER_OK/MISMATCH] based on total_active_blocks delta).
- Keep linear carve fail-fast guards across simple/general/TLS-bump paths.

Perf (1T, 100k×256B)
- P0 OFF: ~2.73M ops/s (stable)
- P0 ON (no drain): ~2.45M ops/s
- P0 ON (normal drain): ~2.76M ops/s (fastest)

Known
- Rare [P0_COUNTER_MISMATCH] warnings persist (non-fatal). Continue auditing active/used
  balance around batch freelist splice and remote drain splice.

Docs
- Add docs/TINY_P0_BATCH_REFILL.md (runtime switches, behavior, perf notes).
- Update CURRENT_TASK.md with Tiny P0 status (default ON) and next steps.

											
										
										
											2025-11-09 22:12:34 +09:00
+								#endif
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss);
 								static void* __attribute__((cold, noinline)) tiny_slow_alloc_fast(int class_idx);
 								static inline void tiny_remote_drain_owner(struct TinySlab* slab);
 								static void tiny_remote_drain_locked(struct TinySlab* slab);
 								// Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop
 								// without any refill or slow-path work. Returns NULL on miss.
 								/* moved below TinyTLSSlab definition */
 								// Step 3d: Forced inlining for readability + performance (306M target)
 								__attribute__((always_inline))
 								static inline void* hak_tiny_alloc_wrapper(int class_idx);
 								// Helpers for SuperSlab active block accounting (atomic, saturating dec)
-												Refactor: Extract 4 safe Box modules from hakmem_tiny.c (-73% total reduction)

Conservative refactoring with Task-sensei's safety analysis.

## Changes

**hakmem_tiny.c**: 616 → 562 lines (-54 lines, -9% this phase)
**Total reduction**: 2081 → 562 lines (-1519 lines, -73% cumulative) 🏆

## Extracted Modules (4 new LOW-risk boxes)

9. **ss_active_box** (6 lines)
   - ss_active_add() - atomic add to active counter
   - ss_active_inc() - atomic increment active counter
   - Pure utility functions, no dependencies
   - Risk: LOW

10. **eventq_box** (32 lines)
   - hak_thread_id16() - thread ID compression
   - eventq_push_ex() - event queue push with sampling
   - Intelligence/telemetry helpers
   - Risk: LOW

11. **sll_cap_box** (12 lines)
   - sll_cap_for_class() - SLL capacity policy
   - Hot classes get multiplier × mag_cap
   - Cold classes get mag_cap / 2
   - Risk: LOW

12. **ultra_batch_box** (20 lines)
   - g_ultra_batch_override[] - batch size overrides
   - g_ultra_sll_cap_override[] - SLL capacity overrides
   - ultra_batch_for_class() - batch size policy
   - Risk: LOW

## Cumulative Progress (12 boxes total)

**Phase 1** (5 boxes): 2081 → 995 lines (-52%)
**Phase 2** (3 boxes): 995 → 616 lines (-38%)
**Phase 3** (4 boxes): 616 → 562 lines (-9%)

**All 12 boxes**:
1. config_box (211 lines)
2. publish_box (419 lines)
3. globals_box (256 lines)
4. phase6_wrappers_box (122 lines)
5. ace_guard_box (100 lines)
6. tls_state_box (224 lines)
7. legacy_slow_box (96 lines)
8. slab_lookup_box (77 lines)
9. ss_active_box (6 lines) ✨
10. eventq_box (32 lines) ✨
11. sll_cap_box (12 lines) ✨
12. ultra_batch_box (20 lines) ✨

**Total extracted**: 1,575 lines across 12 coherent modules
**Remaining core**: 562 lines (highly focused)

## Safety Approach

- Task-sensei performed deep dependency analysis
- Extracted only LOW-risk candidates
- All dependencies verified at compile time
- Forward declarations already present
- No circular dependencies
- Build tested after each extraction ✅

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 03:20:42 +09:00
 								// SuperSlab Active Counter Helpers - EXTRACTED to hakmem_tiny_ss_active_box.inc
 								#include "hakmem_tiny_ss_active_box.inc"
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED: ss_active_dec_one() moved to hakmem_tiny_superslab.h (Phase 2C-2)
-												Tiny: add per-class refill count tuning infrastructure (ChatGPT)

External AI (ChatGPT Pro) implemented hierarchical refill count tuning:
- Move getenv() from hot path to init (performance hygiene)
- Add per-class granularity: global → hot/mid → per-class precedence
- Environment variables:
  * HAKMEM_TINY_REFILL_COUNT (global default)
  * HAKMEM_TINY_REFILL_COUNT_HOT (classes 0-3)
  * HAKMEM_TINY_REFILL_COUNT_MID (classes 4-7)
  * HAKMEM_TINY_REFILL_COUNT_C{0..7} (per-class override)

Performance impact: Neutral (no tuning applied yet, default=16)
- Larson 4-thread: 4.19M ops/s (unchanged)
- No measurable overhead from init-time parsing

Code quality improvement:
- Better separation: hot path reads plain ints (no syscalls)
- Future-proof: enables A/B testing per size class
- Documentation: ENV_VARS.md updated

Note: Per Ultrathink's advice, further tuning deferred until bottleneck
visualization (superslab_refill branch analysis) is complete.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <external-ai@openai.com>

											
										
										
											2025-11-05 17:45:11 +09:00
+								// Front refill count global config (declare before init.inc uses them)
 								extern int g_refill_count_global;
 								extern int g_refill_count_hot;
 								extern int g_refill_count_mid;
 								extern int g_refill_count_class[TINY_NUM_CLASSES];
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Step 3d: Forced inlining for slow path (maintain monolithic performance)
 								// Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c)
 								#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
 								void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx);
 								#else
 								static void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx);
 								#endif
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ---------------------------------------------------------------------------
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								// Box: adopt_gate_try (implementation moved from header for robust linkage)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ---------------------------------------------------------------------------
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								#include "box/adopt_gate_box.h"
-												Boxify superslab registry, add bench profile, and document C7 hotpath experiments

											
										
										
											2025-12-07 03:12:27 +09:00
+								#include "box/super_reg_box.h"
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								extern int g_super_reg_class_size[TINY_NUM_CLASSES];
 								extern unsigned long long g_adopt_gate_calls[];
 								extern unsigned long long g_adopt_gate_success[];
 								extern unsigned long long g_reg_scan_attempts[];
 								extern unsigned long long g_reg_scan_hits[];
 								SuperSlab* adopt_gate_try(int class_idx, TinyTLSSlab* tls) {
 								    g_adopt_gate_calls[class_idx]++;
 								    ROUTE_MARK(13);
 								    SuperSlab* ss = tiny_refill_try_fast(class_idx, tls);
 								    if (ss) { g_adopt_gate_success[class_idx]++; return ss; }
 								    g_reg_scan_attempts[class_idx]++;
 								    int reg_size = g_super_reg_class_size[class_idx];
-												Boxify superslab registry, add bench profile, and document C7 hotpath experiments

											
										
										
											2025-12-07 03:12:27 +09:00
+								    int reg_cap = super_reg_effective_per_class();
 								    if (reg_cap > 0 && reg_size > reg_cap) {
 								        reg_size = reg_cap;
 								    }
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								    int scan_limit = tiny_reg_scan_max();
 								    if (scan_limit > reg_size) scan_limit = reg_size;
 								    uint32_t self_tid = tiny_self_u32();
-												Tiny: fix header/stride mismatch and harden refill paths

- Root cause: header-based class indexing (HEADER_CLASSIDX=1) wrote a 1-byte
  header during allocation, but linear carve/refill and initial slab capacity
  still used bare class block sizes. This mismatch could overrun slab usable
  space and corrupt freelists, causing reproducible SEGV at ~100k iters.

Changes
- Superslab: compute capacity with effective stride (block_size + header for
  classes 0..6; class7 remains headerless) in superslab_init_slab(). Add a
  debug-only bound check in superslab_alloc_from_slab() to fail fast if carve
  would exceed usable bytes.
- Refill (non-P0 and P0): use header-aware stride for all linear carving and
  TLS window bump operations. Ensure alignment/validation in tiny_refill_opt.h
  also uses stride, not raw class size.
- Drain: keep existing defense-in-depth for remote sentinel and sanitize nodes
  before splicing into freelist (already present).

Notes
- This unifies the memory layout across alloc/linear-carve/refill with a single
  stride definition and keeps class7 (1024B) headerless as designed.
- Debug builds add fail-fast checks; release builds remain lean.

Next
- Re-run Tiny benches (256/1024B) in debug to confirm stability, then in
  release. If any remaining crash persists, bisect with HAKMEM_TINY_P0_BATCH_REFILL=0
  to isolate P0 batch carve, and continue reducing branch-miss as planned.

											
										
										
											2025-11-09 18:55:50 +09:00
+								    // Local helper (mirror adopt_bind_if_safe) to avoid including alloc inline here
 								    auto int adopt_bind_if_safe_local(TinyTLSSlab* tls_l, SuperSlab* ss, int slab_idx, int class_idx_l) {
 								        uint32_t self_tid = tiny_self_u32();
 								        SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
 								        if (!slab_is_valid(&h)) return 0;
 								        slab_drain_remote_full(&h);
 								        if (__builtin_expect(slab_is_safe_to_bind(&h), 1)) {
 								            tiny_tls_bind_slab(tls_l, h.ss, h.slab_idx);
 								            slab_release(&h);
 								            return 1;
 								        }
 								        slab_release(&h);
 								        return 0;
 								    }
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								    for (int i = 0; i < scan_limit; i++) {
-												Boxify superslab registry, add bench profile, and document C7 hotpath experiments

											
										
										
											2025-12-07 03:12:27 +09:00
+								        SuperSlab* cand = super_reg_by_class_at(class_idx, i);
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								        if (!(cand && cand->magic == SUPERSLAB_MAGIC)) continue;
 								        // Fast path: use nonempty_mask / freelist_mask to locate candidates in O(1)
 								        uint32_t mask = cand->nonempty_mask;
 								        // Fallback to atomic freelist_mask for cross-thread visibility
 								        if (mask == 0) {
 								            mask = atomic_load_explicit(&cand->freelist_mask, memory_order_acquire);
 								        }
 								        if (mask == 0) continue;  // No visible freelists in this SS
 								        int cap = ss_slabs_capacity(cand);
 								        while (mask) {
 								            int sidx = __builtin_ctz(mask);
-												Tiny: fix header/stride mismatch and harden refill paths

- Root cause: header-based class indexing (HEADER_CLASSIDX=1) wrote a 1-byte
  header during allocation, but linear carve/refill and initial slab capacity
  still used bare class block sizes. This mismatch could overrun slab usable
  space and corrupt freelists, causing reproducible SEGV at ~100k iters.

Changes
- Superslab: compute capacity with effective stride (block_size + header for
  classes 0..6; class7 remains headerless) in superslab_init_slab(). Add a
  debug-only bound check in superslab_alloc_from_slab() to fail fast if carve
  would exceed usable bytes.
- Refill (non-P0 and P0): use header-aware stride for all linear carving and
  TLS window bump operations. Ensure alignment/validation in tiny_refill_opt.h
  also uses stride, not raw class size.
- Drain: keep existing defense-in-depth for remote sentinel and sanitize nodes
  before splicing into freelist (already present).

Notes
- This unifies the memory layout across alloc/linear-carve/refill with a single
  stride definition and keeps class7 (1024B) headerless as designed.
- Debug builds add fail-fast checks; release builds remain lean.

Next
- Re-run Tiny benches (256/1024B) in debug to confirm stability, then in
  release. If any remaining crash persists, bisect with HAKMEM_TINY_P0_BATCH_REFILL=0
  to isolate P0 batch carve, and continue reducing branch-miss as planned.

											
										
										
											2025-11-09 18:55:50 +09:00
+								            mask &= (mask - 1);
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								            if (sidx >= cap) continue;
-												Tiny: fix header/stride mismatch and harden refill paths

- Root cause: header-based class indexing (HEADER_CLASSIDX=1) wrote a 1-byte
  header during allocation, but linear carve/refill and initial slab capacity
  still used bare class block sizes. This mismatch could overrun slab usable
  space and corrupt freelists, causing reproducible SEGV at ~100k iters.

Changes
- Superslab: compute capacity with effective stride (block_size + header for
  classes 0..6; class7 remains headerless) in superslab_init_slab(). Add a
  debug-only bound check in superslab_alloc_from_slab() to fail fast if carve
  would exceed usable bytes.
- Refill (non-P0 and P0): use header-aware stride for all linear carving and
  TLS window bump operations. Ensure alignment/validation in tiny_refill_opt.h
  also uses stride, not raw class size.
- Drain: keep existing defense-in-depth for remote sentinel and sanitize nodes
  before splicing into freelist (already present).

Notes
- This unifies the memory layout across alloc/linear-carve/refill with a single
  stride definition and keeps class7 (1024B) headerless as designed.
- Debug builds add fail-fast checks; release builds remain lean.

Next
- Re-run Tiny benches (256/1024B) in debug to confirm stability, then in
  release. If any remaining crash persists, bisect with HAKMEM_TINY_P0_BATCH_REFILL=0
  to isolate P0 batch carve, and continue reducing branch-miss as planned.

											
										
										
											2025-11-09 18:55:50 +09:00
+								            if (adopt_bind_if_safe_local(tls, cand, sidx, class_idx)) {
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								                g_adopt_gate_success[class_idx]++;
 								                g_reg_scan_hits[class_idx]++;
 								                ROUTE_MARK(14); ROUTE_COMMIT(class_idx, 0x07);
-												Tiny: fix header/stride mismatch and harden refill paths

- Root cause: header-based class indexing (HEADER_CLASSIDX=1) wrote a 1-byte
  header during allocation, but linear carve/refill and initial slab capacity
  still used bare class block sizes. This mismatch could overrun slab usable
  space and corrupt freelists, causing reproducible SEGV at ~100k iters.

Changes
- Superslab: compute capacity with effective stride (block_size + header for
  classes 0..6; class7 remains headerless) in superslab_init_slab(). Add a
  debug-only bound check in superslab_alloc_from_slab() to fail fast if carve
  would exceed usable bytes.
- Refill (non-P0 and P0): use header-aware stride for all linear carving and
  TLS window bump operations. Ensure alignment/validation in tiny_refill_opt.h
  also uses stride, not raw class size.
- Drain: keep existing defense-in-depth for remote sentinel and sanitize nodes
  before splicing into freelist (already present).

Notes
- This unifies the memory layout across alloc/linear-carve/refill with a single
  stride definition and keeps class7 (1024B) headerless as designed.
- Debug builds add fail-fast checks; release builds remain lean.

Next
- Re-run Tiny benches (256/1024B) in debug to confirm stability, then in
  release. If any remaining crash persists, bisect with HAKMEM_TINY_P0_BATCH_REFILL=0
  to isolate P0 batch carve, and continue reducing branch-miss as planned.

											
										
										
											2025-11-09 18:55:50 +09:00
+								                return cand;
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								            }
 								        }
 								    }
 								    return NULL;
 								}
-												Refactor: Extract 5 Box modules from hakmem_tiny.c (-52% size reduction)

Split hakmem_tiny.c (2081 lines) into focused modules for better maintainability.

## Changes

**hakmem_tiny.c**: 2081 → 995 lines (-1086 lines, -52% reduction)

## Extracted Modules (5 boxes)

1. **config_box** (211 lines)
   - Size class tables, integrity counters
   - Debug flags, benchmark macros
   - HAK_RET_ALLOC/HAK_STAT_FREE instrumentation

2. **publish_box** (419 lines)
   - Publish/Adopt counters and statistics
   - Bench mailbox, partial ring
   - Live cap/Hot slot management
   - TLS helper functions (tiny_tls_default_*)

3. **globals_box** (256 lines)
   - Global variable declarations (~70 variables)
   - TinyPool instance and initialization flag
   - TLS variables (g_tls_lists, g_fast_head, g_fast_count)
   - SuperSlab configuration (partial ring, empty reserves)
   - Adopt gate functions

4. **phase6_wrappers_box** (122 lines)
   - Phase 6 Box Theory wrapper layer
   - hak_tiny_alloc_fast_wrapper()
   - hak_tiny_free_fast_wrapper()
   - Diagnostic instrumentation

5. **ace_guard_box** (100 lines)
   - ACE Learning Layer (hkm_ace_set_drain_threshold)
   - FastCache API (tiny_fc_room, tiny_fc_push_bulk)
   - Tiny Guard debugging system (5 functions)

## Benefits

- **Readability**: Giant 2k file → focused 1k core + 5 coherent modules
- **Maintainability**: Each box has clear responsibility and boundaries
- **Build**: All modules compile successfully ✅

## Technical Details

- Phase 1: ChatGPT extracted config_box + publish_box (-625 lines)
- Phase 2-4: Claude extracted globals_box + phase6_wrappers_box + ace_guard_box (-461 lines)
- All extractions use .inc files (same translation unit, preserves static/TLS linkage)
- Fixed Makefile: Added tiny_sizeclass_hist_box.o to OBJS_BASE and BENCH_HAKMEM_OBJS_BASE

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 01:16:45 +09:00
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Refactor: Extract 5 Box modules from hakmem_tiny.c (-52% size reduction)

Split hakmem_tiny.c (2081 lines) into focused modules for better maintainability.

## Changes

**hakmem_tiny.c**: 2081 → 995 lines (-1086 lines, -52% reduction)

## Extracted Modules (5 boxes)

1. **config_box** (211 lines)
   - Size class tables, integrity counters
   - Debug flags, benchmark macros
   - HAK_RET_ALLOC/HAK_STAT_FREE instrumentation

2. **publish_box** (419 lines)
   - Publish/Adopt counters and statistics
   - Bench mailbox, partial ring
   - Live cap/Hot slot management
   - TLS helper functions (tiny_tls_default_*)

3. **globals_box** (256 lines)
   - Global variable declarations (~70 variables)
   - TinyPool instance and initialization flag
   - TLS variables (g_tls_lists, g_fast_head, g_fast_count)
   - SuperSlab configuration (partial ring, empty reserves)
   - Adopt gate functions

4. **phase6_wrappers_box** (122 lines)
   - Phase 6 Box Theory wrapper layer
   - hak_tiny_alloc_fast_wrapper()
   - hak_tiny_free_fast_wrapper()
   - Diagnostic instrumentation

5. **ace_guard_box** (100 lines)
   - ACE Learning Layer (hkm_ace_set_drain_threshold)
   - FastCache API (tiny_fc_room, tiny_fc_push_bulk)
   - Tiny Guard debugging system (5 functions)

## Benefits

- **Readability**: Giant 2k file → focused 1k core + 5 coherent modules
- **Maintainability**: Each box has clear responsibility and boundaries
- **Build**: All modules compile successfully ✅

## Technical Details

- Phase 1: ChatGPT extracted config_box + publish_box (-625 lines)
- Phase 2-4: Claude extracted globals_box + phase6_wrappers_box + ace_guard_box (-461 lines)
- All extractions use .inc files (same translation unit, preserves static/TLS linkage)
- Fixed Makefile: Added tiny_sizeclass_hist_box.o to OBJS_BASE and BENCH_HAKMEM_OBJS_BASE

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 01:16:45 +09:00
+								// Global State - EXTRACTED to hakmem_tiny_globals_box.inc
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Refactor: Extract 5 Box modules from hakmem_tiny.c (-52% size reduction)

Split hakmem_tiny.c (2081 lines) into focused modules for better maintainability.

## Changes

**hakmem_tiny.c**: 2081 → 995 lines (-1086 lines, -52% reduction)

## Extracted Modules (5 boxes)

1. **config_box** (211 lines)
   - Size class tables, integrity counters
   - Debug flags, benchmark macros
   - HAK_RET_ALLOC/HAK_STAT_FREE instrumentation

2. **publish_box** (419 lines)
   - Publish/Adopt counters and statistics
   - Bench mailbox, partial ring
   - Live cap/Hot slot management
   - TLS helper functions (tiny_tls_default_*)

3. **globals_box** (256 lines)
   - Global variable declarations (~70 variables)
   - TinyPool instance and initialization flag
   - TLS variables (g_tls_lists, g_fast_head, g_fast_count)
   - SuperSlab configuration (partial ring, empty reserves)
   - Adopt gate functions

4. **phase6_wrappers_box** (122 lines)
   - Phase 6 Box Theory wrapper layer
   - hak_tiny_alloc_fast_wrapper()
   - hak_tiny_free_fast_wrapper()
   - Diagnostic instrumentation

5. **ace_guard_box** (100 lines)
   - ACE Learning Layer (hkm_ace_set_drain_threshold)
   - FastCache API (tiny_fc_room, tiny_fc_push_bulk)
   - Tiny Guard debugging system (5 functions)

## Benefits

- **Readability**: Giant 2k file → focused 1k core + 5 coherent modules
- **Maintainability**: Each box has clear responsibility and boundaries
- **Build**: All modules compile successfully ✅

## Technical Details

- Phase 1: ChatGPT extracted config_box + publish_box (-625 lines)
- Phase 2-4: Claude extracted globals_box + phase6_wrappers_box + ace_guard_box (-461 lines)
- All extractions use .inc files (same translation unit, preserves static/TLS linkage)
- Fixed Makefile: Added tiny_sizeclass_hist_box.o to OBJS_BASE and BENCH_HAKMEM_OBJS_BASE

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 01:16:45 +09:00
+								#include "hakmem_tiny_globals_box.inc"
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
-												Refactor: Extract 5 Box modules from hakmem_tiny.c (-52% size reduction)

Split hakmem_tiny.c (2081 lines) into focused modules for better maintainability.

## Changes

**hakmem_tiny.c**: 2081 → 995 lines (-1086 lines, -52% reduction)

## Extracted Modules (5 boxes)

1. **config_box** (211 lines)
   - Size class tables, integrity counters
   - Debug flags, benchmark macros
   - HAK_RET_ALLOC/HAK_STAT_FREE instrumentation

2. **publish_box** (419 lines)
   - Publish/Adopt counters and statistics
   - Bench mailbox, partial ring
   - Live cap/Hot slot management
   - TLS helper functions (tiny_tls_default_*)

3. **globals_box** (256 lines)
   - Global variable declarations (~70 variables)
   - TinyPool instance and initialization flag
   - TLS variables (g_tls_lists, g_fast_head, g_fast_count)
   - SuperSlab configuration (partial ring, empty reserves)
   - Adopt gate functions

4. **phase6_wrappers_box** (122 lines)
   - Phase 6 Box Theory wrapper layer
   - hak_tiny_alloc_fast_wrapper()
   - hak_tiny_free_fast_wrapper()
   - Diagnostic instrumentation

5. **ace_guard_box** (100 lines)
   - ACE Learning Layer (hkm_ace_set_drain_threshold)
   - FastCache API (tiny_fc_room, tiny_fc_push_bulk)
   - Tiny Guard debugging system (5 functions)

## Benefits

- **Readability**: Giant 2k file → focused 1k core + 5 coherent modules
- **Maintainability**: Each box has clear responsibility and boundaries
- **Build**: All modules compile successfully ✅

## Technical Details

- Phase 1: ChatGPT extracted config_box + publish_box (-625 lines)
- Phase 2-4: Claude extracted globals_box + phase6_wrappers_box + ace_guard_box (-461 lines)
- All extractions use .inc files (same translation unit, preserves static/TLS linkage)
- Fixed Makefile: Added tiny_sizeclass_hist_box.o to OBJS_BASE and BENCH_HAKMEM_OBJS_BASE

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 01:16:45 +09:00
+								#include "hakmem_tiny_publish_box.inc"
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Functions: tiny_fast_pop(), tiny_fast_push() - 28 lines (lines 377-404)
 								// Forward declarations for functions defined in hakmem_tiny_fastcache.inc.h
-												Implement Phantom typing for Tiny FastCache layer

Refactor FastCache and TLS cache APIs to use Phantom types (hak_base_ptr_t)
for compile-time type safety, preventing BASE/USER pointer confusion.

Changes:
1. core/hakmem_tiny_fastcache.inc.h:
   - fastcache_pop() returns hak_base_ptr_t instead of void*
   - fastcache_push() accepts hak_base_ptr_t instead of void*

2. core/hakmem_tiny.c:
   - Updated forward declarations to match new signatures

3. core/tiny_alloc_fast.inc.h, core/hakmem_tiny_alloc.inc:
   - Alloc paths now use hak_base_ptr_t for cache operations
   - BASE->USER conversion via HAK_RET_ALLOC macro

4. core/hakmem_tiny_refill.inc.h, core/refill/ss_refill_fc.h:
   - Refill paths properly handle BASE pointer types
   - Fixed: Removed unnecessary HAK_BASE_FROM_RAW() in ss_refill_fc.h line 176

5. core/hakmem_tiny_free.inc, core/tiny_free_magazine.inc.h:
   - Free paths convert USER->BASE before cache push
   - USER->BASE conversion via HAK_USER_TO_BASE or ptr_user_to_base()

6. core/hakmem_tiny_legacy_slow_box.inc:
   - Legacy path properly wraps pointers for cache API

Benefits:
- Type safety at compile time (in debug builds)
- Zero runtime overhead (debug builds only, release builds use typedef=void*)
- All BASE->USER conversions verified via Task analysis
- Prevents pointer type confusion bugs

Testing:
- Build: SUCCESS (all 9 files)
- Smoke test: PASS (sh8bench runs to completion)
- Conversion path verification: 3/3 paths correct

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 11:05:06 +09:00
+								static inline hak_base_ptr_t tiny_fast_pop(int class_idx);
 								static inline int tiny_fast_push(int class_idx, hak_base_ptr_t ptr);
 								static inline hak_base_ptr_t fastcache_pop(int class_idx);
 								static inline int fastcache_push(int class_idx, hak_base_ptr_t ptr);
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_hot_pop.inc.h (Phase 2D-1)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Functions: tiny_hot_pop_class0(), tiny_hot_pop_class1(), tiny_hot_pop_class2(), tiny_hot_pop_class3()
 								// 88 lines (lines 407-494)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Comprehensive legacy cleanup and architecture consolidation

Summary of Changes:

MOVED TO ARCHIVE:
- core/hakmem_tiny_legacy_slow_box.inc → archive/
  * Slow path legacy code preserved for reference
  * Superseded by Gatekeeper Box architecture

- core/superslab_allocate.c → archive/superslab_allocate_legacy.c
  * Legacy SuperSlab allocation implementation
  * Functionality integrated into new Box system

- core/superslab_head.c → archive/superslab_head_legacy.c
  * Legacy slab head management
  * Refactored through Box architecture

REMOVED DEAD CODE:
- Eliminated unused allocation policy variants from ss_allocation_box.c
  * Reduced from 127+ lines of conditional logic to focused implementation
  * Removed: old policy branches, unused allocation strategies
  * Kept: current Box-based allocation path

ADDED NEW INFRASTRUCTURE:
- core/superslab_head_stub.c (41 lines)
  * Minimal stub for backward compatibility
  * Delegates to new architecture

- Enhanced core/superslab_cache.c (75 lines added)
  * Added missing API functions for cache management
  * Proper interface for SuperSlab cache integration

REFACTORED CORE SYSTEMS:
- core/hakmem_super_registry.c
  * Moved registration logic from scattered locations
  * Centralized SuperSlab registry management

- core/hakmem_tiny.c
  * Removed 27 lines of redundant initialization
  * Simplified through Box architecture

- core/hakmem_tiny_alloc.inc
  * Streamlined allocation path to use Gatekeeper
  * Removed legacy decision logic

- core/box/ss_allocation_box.c/h
  * Dramatically simplified allocation policy
  * Removed conditional branches for unused strategies
  * Focused on current Box-based approach

BUILD SYSTEM:
- Updated Makefile for archive structure
- Removed obsolete object file references
- Maintained build compatibility

SAFETY & TESTING:
- All deletions verified: no broken references
- Build verification: RELEASE=0 and RELEASE=1 pass
- Smoke tests: 100% pass rate
- Functional verification: allocation/free intact

Architecture Consolidation:
Before: Multiple overlapping allocation paths with legacy code branches
After:  Single unified path through Gatekeeper Boxes with clear architecture

Benefits:
- Reduced code size and complexity
- Improved maintainability
- Single source of truth for allocation logic
- Better diagnostic/observability hooks
- Foundation for future optimizations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 14:22:48 +09:00
+								// Legacy Slow Allocation Path - ARCHIVED
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Comprehensive legacy cleanup and architecture consolidation

Summary of Changes:

MOVED TO ARCHIVE:
- core/hakmem_tiny_legacy_slow_box.inc → archive/
  * Slow path legacy code preserved for reference
  * Superseded by Gatekeeper Box architecture

- core/superslab_allocate.c → archive/superslab_allocate_legacy.c
  * Legacy SuperSlab allocation implementation
  * Functionality integrated into new Box system

- core/superslab_head.c → archive/superslab_head_legacy.c
  * Legacy slab head management
  * Refactored through Box architecture

REMOVED DEAD CODE:
- Eliminated unused allocation policy variants from ss_allocation_box.c
  * Reduced from 127+ lines of conditional logic to focused implementation
  * Removed: old policy branches, unused allocation strategies
  * Kept: current Box-based allocation path

ADDED NEW INFRASTRUCTURE:
- core/superslab_head_stub.c (41 lines)
  * Minimal stub for backward compatibility
  * Delegates to new architecture

- Enhanced core/superslab_cache.c (75 lines added)
  * Added missing API functions for cache management
  * Proper interface for SuperSlab cache integration

REFACTORED CORE SYSTEMS:
- core/hakmem_super_registry.c
  * Moved registration logic from scattered locations
  * Centralized SuperSlab registry management

- core/hakmem_tiny.c
  * Removed 27 lines of redundant initialization
  * Simplified through Box architecture

- core/hakmem_tiny_alloc.inc
  * Streamlined allocation path to use Gatekeeper
  * Removed legacy decision logic

- core/box/ss_allocation_box.c/h
  * Dramatically simplified allocation policy
  * Removed conditional branches for unused strategies
  * Focused on current Box-based approach

BUILD SYSTEM:
- Updated Makefile for archive structure
- Removed obsolete object file references
- Maintained build compatibility

SAFETY & TESTING:
- All deletions verified: no broken references
- Build verification: RELEASE=0 and RELEASE=1 pass
- Smoke tests: 100% pass rate
- Functional verification: allocation/free intact

Architecture Consolidation:
Before: Multiple overlapping allocation paths with legacy code branches
After:  Single unified path through Gatekeeper Boxes with clear architecture

Benefits:
- Reduced code size and complexity
- Improved maintainability
- Single source of truth for allocation logic
- Better diagnostic/observability hooks
- Foundation for future optimizations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 14:22:48 +09:00
+								// Note: tiny_slow_alloc_fast() and related legacy slow path implementation
 								// have been moved to archive/hakmem_tiny_legacy_slow_box.inc and are no
 								// longer compiled. The current slow path uses Box化された hak_tiny_alloc_slow().
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Function: tiny_fast_refill_and_take() - 39 lines (lines 584-622)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Refactor: Extract 3 more Box modules from hakmem_tiny.c (-70% total reduction)

Continue hakmem_tiny.c refactoring with 3 large module extractions.

## Changes

**hakmem_tiny.c**: 995 → 616 lines (-379 lines, -38% this phase)
**Total reduction**: 2081 → 616 lines (-1465 lines, -70% cumulative) 🏆

## Extracted Modules (3 new boxes)

6. **tls_state_box** (224 lines)
   - TLS SLL enable flags and configuration
   - TLS canaries and SLL array definitions
   - Debug counters (path, ultra, allocation)
   - Frontend/backend configuration
   - TLS thread ID caching helpers
   - Frontend hit/miss counters
   - HotMag, QuickSlot, Ultra-front configuration
   - Helper functions (is_hot_class, tiny_optional_push)
   - Intelligence system helpers

7. **legacy_slow_box** (96 lines)
   - tiny_slow_alloc_fast() function (cold/unused)
   - Legacy slab-based allocation with refill
   - TLS cache/fast cache refill from slabs
   - Remote drain handling
   - List management (move to full/free lists)
   - Marked __attribute__((cold, noinline, unused))

8. **slab_lookup_box** (77 lines)
   - registry_lookup() - O(1) hash-based lookup
   - hak_tiny_owner_slab() - public API for slab discovery
   - Linear probing search with atomic owner access
   - O(N) fallback for non-registry mode
   - Safety validation for membership checking

## Cumulative Progress (8 boxes total)

**Previously extracted** (Phase 1):
1. config_box (211 lines)
2. publish_box (419 lines)
3. globals_box (256 lines)
4. phase6_wrappers_box (122 lines)
5. ace_guard_box (100 lines)

**This phase** (Phase 2):
6. tls_state_box (224 lines)
7. legacy_slow_box (96 lines)
8. slab_lookup_box (77 lines)

**Total extracted**: 1,505 lines across 8 coherent modules
**Remaining core**: 616 lines (well-organized, focused)

## Benefits

- **Readability**: 2k monolith → focused 616-line core
- **Maintainability**: Each box has single responsibility
- **Organization**: TLS state, legacy code, lookup utilities separated
- **Build**: All modules compile successfully ✅

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 01:23:59 +09:00
+								// TLS/Frontend State & Configuration - EXTRACTED to hakmem_tiny_tls_state_box.inc
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Refactor: Extract 3 more Box modules from hakmem_tiny.c (-70% total reduction)

Continue hakmem_tiny.c refactoring with 3 large module extractions.

## Changes

**hakmem_tiny.c**: 995 → 616 lines (-379 lines, -38% this phase)
**Total reduction**: 2081 → 616 lines (-1465 lines, -70% cumulative) 🏆

## Extracted Modules (3 new boxes)

6. **tls_state_box** (224 lines)
   - TLS SLL enable flags and configuration
   - TLS canaries and SLL array definitions
   - Debug counters (path, ultra, allocation)
   - Frontend/backend configuration
   - TLS thread ID caching helpers
   - Frontend hit/miss counters
   - HotMag, QuickSlot, Ultra-front configuration
   - Helper functions (is_hot_class, tiny_optional_push)
   - Intelligence system helpers

7. **legacy_slow_box** (96 lines)
   - tiny_slow_alloc_fast() function (cold/unused)
   - Legacy slab-based allocation with refill
   - TLS cache/fast cache refill from slabs
   - Remote drain handling
   - List management (move to full/free lists)
   - Marked __attribute__((cold, noinline, unused))

8. **slab_lookup_box** (77 lines)
   - registry_lookup() - O(1) hash-based lookup
   - hak_tiny_owner_slab() - public API for slab discovery
   - Linear probing search with atomic owner access
   - O(N) fallback for non-registry mode
   - Safety validation for membership checking

## Cumulative Progress (8 boxes total)

**Previously extracted** (Phase 1):
1. config_box (211 lines)
2. publish_box (419 lines)
3. globals_box (256 lines)
4. phase6_wrappers_box (122 lines)
5. ace_guard_box (100 lines)

**This phase** (Phase 2):
6. tls_state_box (224 lines)
7. legacy_slow_box (96 lines)
8. slab_lookup_box (77 lines)

**Total extracted**: 1,505 lines across 8 coherent modules
**Remaining core**: 616 lines (well-organized, focused)

## Benefits

- **Readability**: 2k monolith → focused 616-line core
- **Maintainability**: Each box has single responsibility
- **Organization**: TLS state, legacy code, lookup utilities separated
- **Build**: All modules compile successfully ✅

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 01:23:59 +09:00
+								#include "hakmem_tiny_tls_state_box.inc"
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
 								#include "hakmem_tiny_intel.inc"
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_rss.c (Phase 2B-2)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED: static int get_rss_kb_self(void) {
 								// EXTRACTED:     FILE* f = fopen("/proc/self/status", "r");
 								// EXTRACTED:     if (!f) return 0;
 								// EXTRACTED:     char buf[256];
 								// EXTRACTED:     int kb = 0;
 								// EXTRACTED:     while (fgets(buf, sizeof(buf), f)) {
 								// EXTRACTED:         if (strncmp(buf, "VmRSS:", 6) == 0) {
 								// EXTRACTED:             char* p = buf;
 								// EXTRACTED:             while (*p && (*p < '0' || *p > '9')) {
 								// EXTRACTED:                 p++;
 								// EXTRACTED:             }
 								// EXTRACTED:             kb = atoi(p);
 								// EXTRACTED:             break;
 								// EXTRACTED:         }
 								// EXTRACTED:     }
 								// EXTRACTED:     fclose(f);
 								// EXTRACTED:     return kb;
 								// EXTRACTED: }
 								// Miss時にマガジンへ大量リフィルせず、1個だけ確保して即返すオプション
 								// Env: HAKMEM_TINY_REFILL_ONE_ON_MISS=1 で有効（デフォルト: 0）
 								int g_refill_one_on_miss = 0;
 								// Frontend fill target per class (adaptive)
 								// NOTE: Non-static because used in hakmem_tiny_refill.inc.h
 								_Atomic uint32_t g_frontend_fill_target[TINY_NUM_CLASSES];
-												Adaptive CAS: Single-threaded fast path optimization

PROBLEM:
- Atomic freelist (Phase 1) introduced 3-5x overhead in hot path
- CAS loop overhead: 16-27 cycles vs 4-6 cycles (non-atomic)
- Single-threaded workloads pay MT safety cost unnecessarily

SOLUTION:
- Runtime thread detection with g_hakmem_active_threads counter
- Single-threaded (1T): Skip CAS, use relaxed load/store (fast)
- Multi-threaded (2+T): Full CAS loop for MT safety

IMPLEMENTATION:
1. core/hakmem_tiny.c:240 - Added g_hakmem_active_threads atomic counter
2. core/hakmem_tiny.c:248 - Added hakmem_thread_register() for per-thread init
3. core/hakmem_tiny.h:160-163 - Exported thread counter and registration API
4. core/box/hak_alloc_api.inc.h:34 - Call hakmem_thread_register() on first alloc
5. core/box/slab_freelist_atomic.h:58-68 - Adaptive CAS in pop_lockfree()
6. core/box/slab_freelist_atomic.h:118-126 - Adaptive CAS in push_lockfree()

DESIGN:
- Thread counter: Incremented on first allocation per thread
- Fast path check: if (num_threads <= 1) → relaxed ops
- Slow path: Full CAS loop (existing Phase 1 implementation)
- Zero overhead when truly single-threaded

PERFORMANCE:
Random Mixed 256B (Single-threaded):
  Before (Phase 1): 16.7M ops/s
  After:            14.9M ops/s (-11%, thread counter overhead)

Larson (Single-threaded):
  Before: 47.9M ops/s
  After:  47.9M ops/s (no change, already fast)

Larson (Multi-threaded 8T):
  Before: 48.8M ops/s
  After:  48.3M ops/s (-1%, within noise)

MT STABILITY:
  1T: 47.9M ops/s ✅
  8T: 48.3M ops/s ✅ (zero crashes, stable)

NOTES:
- Expected Larson improvement (0.80M → 1.80M) not observed
- Larson was already fast (47.9M) in Phase 1
- Possible Task investigation used different benchmark
- Adaptive CAS implementation verified and working correctly

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-22 03:30:47 +09:00
+								// Adaptive CAS: Active thread counter (for single-threaded optimization)
 								// Incremented on thread init, decremented on thread shutdown
 								_Atomic uint32_t g_hakmem_active_threads = 0;
 								// Per-thread registration flag (TLS variable)
 								static __thread int g_thread_registered = 0;
 								// Adaptive CAS: Register current thread (called on first allocation)
 								// NOTE: Non-static for cross-TU visibility (called from hak_alloc_api.inc.h)
 								__attribute__((always_inline))
 								inline void hakmem_thread_register(void) {
 								    if (__builtin_expect(g_thread_registered == 0, 0)) {
 								        g_thread_registered = 1;
 								        atomic_fetch_add_explicit(&g_hakmem_active_threads, 1, memory_order_relaxed);
 								    }
 								}
-												ENV Cleanup: Delete Ultra HEAP & BG Remote dead code (-1,096 LOC)

Deleted files (11):
- core/ultra/ directory (6 files: tiny_ultra_heap.*, tiny_ultra_page_arena.*)
- core/front/tiny_ultrafront.h
- core/tiny_ultra_fast.inc.h
- core/hakmem_tiny_ultra_front.inc.h
- core/hakmem_tiny_ultra_simple.inc
- core/hakmem_tiny_ultra_batch_box.inc

Edited files (10):
- core/hakmem_tiny.c: Remove Ultra HEAP #includes, move ultra_batch_for_class()
- core/hakmem_tiny_tls_state_box.inc: Delete TinyUltraFront, g_ultra_simple
- core/hakmem_tiny_phase6_wrappers_box.inc: Delete ULTRA_SIMPLE block
- core/hakmem_tiny_alloc.inc: Delete Ultra-Front code block
- core/hakmem_tiny_init.inc: Delete ULTRA_SIMPLE ENV loading
- core/hakmem_tiny_remote_target.{c,h}: Delete g_bg_remote_enable/batch
- core/tiny_refill.h: Remove BG Remote check (always break)
- core/hakmem_tiny_background.inc: Delete BG Remote drain loop

Deleted ENV variables:
- HAKMEM_TINY_ULTRA_HEAP (build flag, undefined)
- HAKMEM_TINY_ULTRA_L0
- HAKMEM_TINY_ULTRA_HEAP_DUMP
- HAKMEM_TINY_ULTRA_PAGE_DUMP
- HAKMEM_TINY_ULTRA_FRONT
- HAKMEM_TINY_BG_REMOTE (no getenv, dead code)
- HAKMEM_TINY_BG_REMOTE_BATCH (no getenv, dead code)
- HAKMEM_TINY_ULTRA_SIMPLE (references only)

Impact:
- Code reduction: -1,096 lines
- Binary size: 305KB → 304KB (-1KB)
- Build: PASS
- Sanity: 15.69M ops/s (3 runs avg)
- Larson: 1 crash observed (seed 43, likely existing instability)

Notes:
- Ultra HEAP never compiled (#if HAKMEM_TINY_ULTRA_HEAP undefined)
- BG Remote variables never initialized (g_bg_remote_enable always 0)
- Ultra SLIM (ultra_slim_alloc_box.h) preserved (active 4-layer path)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-27 04:35:47 +09:00
+								// SLL capacity override array (moved from deleted hakmem_tiny_ultra_batch_box.inc)
 								static int g_ultra_batch_override[TINY_NUM_CLASSES] = {0};
 								static int g_ultra_sll_cap_override[TINY_NUM_CLASSES] = {0};
 								// Helper function for batch size (moved from deleted hakmem_tiny_ultra_batch_box.inc)
 								static inline int ultra_batch_for_class(int class_idx) {
 								    int ov = g_ultra_batch_override[class_idx];
 								    if (ov > 0) return ov;
 								    switch (class_idx) {
 								        case 0: return 64;            // 8B
 								        case 1: return 96;            // 16B
 								        case 2: return 96;            // 32B
 								        case 3: return 224;           // 64B
 								        case 4: return 96;            // 128B
 								        case 5: return 64;            // 256B
 								        case 6: return 64;            // 512B
 								        default: return 32;           // 1024B and others
 								    }
 								}
 								// Helper function for SLL capacity (moved from deleted hakmem_tiny_ultra_batch_box.inc)
 								static inline int ultra_sll_cap_for_class(int class_idx) {
 								    int ov = g_ultra_sll_cap_override[class_idx];
 								    if (ov > 0) return ov;
 								    switch (class_idx) {
 								        case 0: return 256;   // 8B
 								        case 1: return 384;   // 16B
 								        case 2: return 384;   // 32B
 								        case 3: return 768;   // 64B
 								        case 4: return 256;   // 128B
 								        default: return 128;  // others
 								    }
 								}
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								enum { HAK_TIER_SLL=1, HAK_TIER_MAG=2, HAK_TIER_SLAB=3, HAK_TIER_SUPER=4, HAK_TIER_FRONT=5 };
-												Refactor: Extract 4 safe Box modules from hakmem_tiny.c (-73% total reduction)

Conservative refactoring with Task-sensei's safety analysis.

## Changes

**hakmem_tiny.c**: 616 → 562 lines (-54 lines, -9% this phase)
**Total reduction**: 2081 → 562 lines (-1519 lines, -73% cumulative) 🏆

## Extracted Modules (4 new LOW-risk boxes)

9. **ss_active_box** (6 lines)
   - ss_active_add() - atomic add to active counter
   - ss_active_inc() - atomic increment active counter
   - Pure utility functions, no dependencies
   - Risk: LOW

10. **eventq_box** (32 lines)
   - hak_thread_id16() - thread ID compression
   - eventq_push_ex() - event queue push with sampling
   - Intelligence/telemetry helpers
   - Risk: LOW

11. **sll_cap_box** (12 lines)
   - sll_cap_for_class() - SLL capacity policy
   - Hot classes get multiplier × mag_cap
   - Cold classes get mag_cap / 2
   - Risk: LOW

12. **ultra_batch_box** (20 lines)
   - g_ultra_batch_override[] - batch size overrides
   - g_ultra_sll_cap_override[] - SLL capacity overrides
   - ultra_batch_for_class() - batch size policy
   - Risk: LOW

## Cumulative Progress (12 boxes total)

**Phase 1** (5 boxes): 2081 → 995 lines (-52%)
**Phase 2** (3 boxes): 995 → 616 lines (-38%)
**Phase 3** (4 boxes): 616 → 562 lines (-9%)

**All 12 boxes**:
1. config_box (211 lines)
2. publish_box (419 lines)
3. globals_box (256 lines)
4. phase6_wrappers_box (122 lines)
5. ace_guard_box (100 lines)
6. tls_state_box (224 lines)
7. legacy_slow_box (96 lines)
8. slab_lookup_box (77 lines)
9. ss_active_box (6 lines) ✨
10. eventq_box (32 lines) ✨
11. sll_cap_box (12 lines) ✨
12. ultra_batch_box (20 lines) ✨

**Total extracted**: 1,575 lines across 12 coherent modules
**Remaining core**: 562 lines (highly focused)

## Safety Approach

- Task-sensei performed deep dependency analysis
- Extracted only LOW-risk candidates
- All dependencies verified at compile time
- Forward declarations already present
- No circular dependencies
- Build tested after each extraction ✅

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 03:20:42 +09:00
+								// Event Queue & Telemetry Helpers - EXTRACTED to hakmem_tiny_eventq_box.inc
 								#include "hakmem_tiny_eventq_box.inc"
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
 								// Background refill workers and intelligence engine
 								#include "hakmem_tiny_background.inc"
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Functions: fastcache_pop(), fastcache_push(), quick_pop() - 25 lines (lines 873-896)
 								// Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop
 								// without any refill or slow-path work. Returns NULL on miss.
 								static inline void* hak_tiny_alloc_superslab_try_fast(int class_idx) {
 								    if (!g_use_superslab) return NULL;
 								    TinyTLSSlab* tls = &g_tls_slabs[class_idx];
 								    TinySlabMeta* meta = tls->meta;
 								    if (!meta) return NULL;
 								    // Try linear (bump) allocation first when freelist is empty
 								    if (meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
-												Phase 12: Shared SuperSlab Pool implementation (WIP - runtime crash)

## Summary
Implemented Phase 12 Shared SuperSlab Pool (mimalloc-style) to address
SuperSlab allocation churn (877 SuperSlabs → 100-200 target).

## Implementation (ChatGPT + Claude)
1. **Metadata changes** (superslab_types.h):
   - Added class_idx to TinySlabMeta (per-slab dynamic class)
   - Removed size_class from SuperSlab (no longer per-SuperSlab)
   - Changed owner_tid (16-bit) → owner_tid_low (8-bit)

2. **Shared Pool** (hakmem_shared_pool.{h,c}):
   - Global pool shared by all size classes
   - shared_pool_acquire_slab() - Get free slab for class_idx
   - shared_pool_release_slab() - Return slab when empty
   - Per-class hints for fast path optimization

3. **Integration** (23 files modified):
   - Updated all ss->size_class → meta->class_idx
   - Updated all meta->owner_tid → meta->owner_tid_low
   - superslab_refill() now uses shared pool
   - Free path releases empty slabs back to pool

4. **Build system** (Makefile):
   - Added hakmem_shared_pool.o to OBJS_BASE and TINY_BENCH_OBJS_BASE

## Status: ⚠️ Build OK, Runtime CRASH

**Build**: ✅ SUCCESS
- All 23 files compile without errors
- Only warnings: superslab_allocate type mismatch (legacy code)

**Runtime**: ❌ SEGFAULT
- Crash location: sll_refill_small_from_ss()
- Exit code: 139 (SIGSEGV)
- Test case: ./bench_random_mixed_hakmem 1000 256 42

## Known Issues
1. **SEGFAULT in refill path** - Likely shared_pool_acquire_slab() issue
2. **Legacy superslab_allocate()** still exists (type mismatch warning)
3. **Remaining TODOs** from design doc:
   - SuperSlab physical layout integration
   - slab_handle.h cleanup
   - Remove old per-class head implementation

## Next Steps
1. Debug SEGFAULT (gdb backtrace shows sll_refill_small_from_ss)
2. Fix shared_pool_acquire_slab() or superslab_init_slab()
3. Basic functionality test (1K → 100K iterations)
4. Measure SuperSlab count reduction (877 → 100-200)
5. Performance benchmark (+650-860% expected)

## Files Changed (25 files)
core/box/free_local_box.c
core/box/free_remote_box.c
core/box/front_gate_classifier.c
core/hakmem_super_registry.c
core/hakmem_tiny.c
core/hakmem_tiny_bg_spill.c
core/hakmem_tiny_free.inc
core/hakmem_tiny_lifecycle.inc
core/hakmem_tiny_magazine.c
core/hakmem_tiny_query.c
core/hakmem_tiny_refill.inc.h
core/hakmem_tiny_superslab.c
core/hakmem_tiny_superslab.h
core/hakmem_tiny_tls_ops.h
core/slab_handle.h
core/superslab/superslab_inline.h
core/superslab/superslab_types.h
core/tiny_debug.h
core/tiny_free_fast.inc.h
core/tiny_free_magazine.inc.h
core/tiny_remote.c
core/tiny_superslab_alloc.inc.h
core/tiny_superslab_free.inc.h
Makefile

## New Files (3 files)
PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md
core/hakmem_shared_pool.c
core/hakmem_shared_pool.h

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>

											
										
										
											2025-11-13 16:33:03 +09:00
+								        // Use per-slab class_idx to get stride
 								        size_t block_size = tiny_stride_for_class(meta->class_idx);
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								        void* block = tls->slab_base + ((size_t)meta->used * block_size);
 								        meta->used++;
-												Fix C7 warm/TLS Release path and unify debug instrumentation

											
										
										
											2025-12-05 23:41:01 +09:00
+								        c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_FRONT);
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								        // Track active blocks in SuperSlab for conservative reclamation
 								        ss_active_inc(tls->ss);
 								        return block;
 								    }
 								    // Do not pop freelist here (keep magazine/SLL handling consistent)
 								    return NULL;
 								}
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Functions: quick_refill_from_sll(), quick_refill_from_mag() - 31 lines (lines 918-949)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Function: sll_refill_small_from_ss() - 45 lines (lines 952-996)
 								// Phase 2C-3: TLS operations module (included after helper function definitions)
 								#include "hakmem_tiny_tls_ops.h"
 								// New TLS list refill: owner-only bulk take from TLS-cached SuperSlab slab
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Function: tls_refill_from_tls_slab() - 101 lines
 								// Hot path refill operation, moved to inline function in header
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Function: tls_list_spill_excess() - 97 lines
 								// Hot path spill operation, moved to inline function in header
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Function: superslab_tls_bump_fast() - 45 lines (lines 1016-1060)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Function: frontend_refill_fc() - 44 lines (lines 1063-1106)
 								// SLL capacity policy: for hot tiny classes (0..3), allow larger SLL up to multiplier * mag_cap
 								// for >=4 keep current conservative half (to limit footprint).
-												Refactor: Extract 4 safe Box modules from hakmem_tiny.c (-73% total reduction)

Conservative refactoring with Task-sensei's safety analysis.

## Changes

**hakmem_tiny.c**: 616 → 562 lines (-54 lines, -9% this phase)
**Total reduction**: 2081 → 562 lines (-1519 lines, -73% cumulative) 🏆

## Extracted Modules (4 new LOW-risk boxes)

9. **ss_active_box** (6 lines)
   - ss_active_add() - atomic add to active counter
   - ss_active_inc() - atomic increment active counter
   - Pure utility functions, no dependencies
   - Risk: LOW

10. **eventq_box** (32 lines)
   - hak_thread_id16() - thread ID compression
   - eventq_push_ex() - event queue push with sampling
   - Intelligence/telemetry helpers
   - Risk: LOW

11. **sll_cap_box** (12 lines)
   - sll_cap_for_class() - SLL capacity policy
   - Hot classes get multiplier × mag_cap
   - Cold classes get mag_cap / 2
   - Risk: LOW

12. **ultra_batch_box** (20 lines)
   - g_ultra_batch_override[] - batch size overrides
   - g_ultra_sll_cap_override[] - SLL capacity overrides
   - ultra_batch_for_class() - batch size policy
   - Risk: LOW

## Cumulative Progress (12 boxes total)

**Phase 1** (5 boxes): 2081 → 995 lines (-52%)
**Phase 2** (3 boxes): 995 → 616 lines (-38%)
**Phase 3** (4 boxes): 616 → 562 lines (-9%)

**All 12 boxes**:
1. config_box (211 lines)
2. publish_box (419 lines)
3. globals_box (256 lines)
4. phase6_wrappers_box (122 lines)
5. ace_guard_box (100 lines)
6. tls_state_box (224 lines)
7. legacy_slow_box (96 lines)
8. slab_lookup_box (77 lines)
9. ss_active_box (6 lines) ✨
10. eventq_box (32 lines) ✨
11. sll_cap_box (12 lines) ✨
12. ultra_batch_box (20 lines) ✨

**Total extracted**: 1,575 lines across 12 coherent modules
**Remaining core**: 562 lines (highly focused)

## Safety Approach

- Task-sensei performed deep dependency analysis
- Extracted only LOW-risk candidates
- All dependencies verified at compile time
- Forward declarations already present
- No circular dependencies
- Build tested after each extraction ✅

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 03:20:42 +09:00
 								// SLL Capacity Policy - EXTRACTED to hakmem_tiny_sll_cap_box.inc
 								#include "hakmem_tiny_sll_cap_box.inc"
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Function: bulk_mag_to_sll_if_room() - 22 lines (lines 1133-1154)
-												ENV Cleanup: Delete Ultra HEAP & BG Remote dead code (-1,096 LOC)

Deleted files (11):
- core/ultra/ directory (6 files: tiny_ultra_heap.*, tiny_ultra_page_arena.*)
- core/front/tiny_ultrafront.h
- core/tiny_ultra_fast.inc.h
- core/hakmem_tiny_ultra_front.inc.h
- core/hakmem_tiny_ultra_simple.inc
- core/hakmem_tiny_ultra_batch_box.inc

Edited files (10):
- core/hakmem_tiny.c: Remove Ultra HEAP #includes, move ultra_batch_for_class()
- core/hakmem_tiny_tls_state_box.inc: Delete TinyUltraFront, g_ultra_simple
- core/hakmem_tiny_phase6_wrappers_box.inc: Delete ULTRA_SIMPLE block
- core/hakmem_tiny_alloc.inc: Delete Ultra-Front code block
- core/hakmem_tiny_init.inc: Delete ULTRA_SIMPLE ENV loading
- core/hakmem_tiny_remote_target.{c,h}: Delete g_bg_remote_enable/batch
- core/tiny_refill.h: Remove BG Remote check (always break)
- core/hakmem_tiny_background.inc: Delete BG Remote drain loop

Deleted ENV variables:
- HAKMEM_TINY_ULTRA_HEAP (build flag, undefined)
- HAKMEM_TINY_ULTRA_L0
- HAKMEM_TINY_ULTRA_HEAP_DUMP
- HAKMEM_TINY_ULTRA_PAGE_DUMP
- HAKMEM_TINY_ULTRA_FRONT
- HAKMEM_TINY_BG_REMOTE (no getenv, dead code)
- HAKMEM_TINY_BG_REMOTE_BATCH (no getenv, dead code)
- HAKMEM_TINY_ULTRA_SIMPLE (references only)

Impact:
- Code reduction: -1,096 lines
- Binary size: 305KB → 304KB (-1KB)
- Build: PASS
- Sanity: 15.69M ops/s (3 runs avg)
- Larson: 1 crash observed (seed 43, likely existing instability)

Notes:
- Ultra HEAP never compiled (#if HAKMEM_TINY_ULTRA_HEAP undefined)
- BG Remote variables never initialized (g_bg_remote_enable always 0)
- Ultra SLIM (ultra_slim_alloc_box.h) preserved (active 4-layer path)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-27 04:35:47 +09:00
+								// Ultra-Mode Batch Configuration - REMOVED (dead code cleanup 2025-11-27)
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
 								#include "hakmem_tiny_remote.inc"
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Internal Helpers
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
 								// Step 2: Slab Registry Operations
 								// Hash function for slab_base (64KB aligned)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_registry.c (Phase 2B-3)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED: static inline int registry_hash(uintptr_t slab_base) {
 								// EXTRACTED:     return (slab_base >> 16) & SLAB_REGISTRY_MASK;
 								// EXTRACTED: }
 								// Register slab in hash table (returns 1 on success, 0 on failure)
 								// EXTRACTED: static int registry_register(uintptr_t slab_base, TinySlab* owner) {
 								// EXTRACTED:     pthread_mutex_lock(&g_tiny_registry_lock);
 								// EXTRACTED:     int hash = registry_hash(slab_base);
 								// EXTRACTED:
 								// EXTRACTED:     // Linear probing (max 8 attempts)
 								// EXTRACTED:     for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
 								// EXTRACTED:         int idx = (hash + i) & SLAB_REGISTRY_MASK;
 								// EXTRACTED:         SlabRegistryEntry* entry = &g_slab_registry[idx];
 								// EXTRACTED:
 								// EXTRACTED:         if (entry->slab_base == 0) {
 								// EXTRACTED:             // Empty slot found
 								// EXTRACTED:             entry->slab_base = slab_base;
 								// EXTRACTED:             atomic_store_explicit(&entry->owner, owner, memory_order_release);
 								// EXTRACTED:             pthread_mutex_unlock(&g_tiny_registry_lock);
 								// EXTRACTED:             return 1;
 								// EXTRACTED:         }
 								// EXTRACTED:     }
 								// EXTRACTED:
 								// EXTRACTED:     // Registry full (collision limit exceeded)
 								// EXTRACTED:     pthread_mutex_unlock(&g_tiny_registry_lock);
 								// EXTRACTED:     return 0;
 								// EXTRACTED: }
 								// Unregister slab from hash table
 								// EXTRACTED: static void registry_unregister(uintptr_t slab_base) {
 								// EXTRACTED:     pthread_mutex_lock(&g_tiny_registry_lock);
 								// EXTRACTED:     int hash = registry_hash(slab_base);
 								// EXTRACTED:
 								// EXTRACTED:     // Linear probing search
 								// EXTRACTED:     for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
 								// EXTRACTED:         int idx = (hash + i) & SLAB_REGISTRY_MASK;
 								// EXTRACTED:         SlabRegistryEntry* entry = &g_slab_registry[idx];
 								// EXTRACTED:
 								// EXTRACTED:         if (entry->slab_base == slab_base) {
 								// EXTRACTED:             // Found - clear entry (atomic store prevents TOCTOU race)
 								// EXTRACTED:             atomic_store_explicit(&entry->owner, NULL, memory_order_release);
 								// EXTRACTED:             entry->slab_base = 0;
 								// EXTRACTED:             pthread_mutex_unlock(&g_tiny_registry_lock);
 								// EXTRACTED:             return;
 								// EXTRACTED:         }
 								// EXTRACTED:
 								// EXTRACTED:         if (entry->slab_base == 0) {
 								// EXTRACTED:             // Empty slot - not found
 								// EXTRACTED:             pthread_mutex_unlock(&g_tiny_registry_lock);
 								// EXTRACTED:             return;
 								// EXTRACTED:         }
 								// EXTRACTED:     }
 								// EXTRACTED:     pthread_mutex_unlock(&g_tiny_registry_lock);
 								// EXTRACTED: }
 								// Lookup slab by base address (O(1) average)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Refactor: Extract 3 more Box modules from hakmem_tiny.c (-70% total reduction)

Continue hakmem_tiny.c refactoring with 3 large module extractions.

## Changes

**hakmem_tiny.c**: 995 → 616 lines (-379 lines, -38% this phase)
**Total reduction**: 2081 → 616 lines (-1465 lines, -70% cumulative) 🏆

## Extracted Modules (3 new boxes)

6. **tls_state_box** (224 lines)
   - TLS SLL enable flags and configuration
   - TLS canaries and SLL array definitions
   - Debug counters (path, ultra, allocation)
   - Frontend/backend configuration
   - TLS thread ID caching helpers
   - Frontend hit/miss counters
   - HotMag, QuickSlot, Ultra-front configuration
   - Helper functions (is_hot_class, tiny_optional_push)
   - Intelligence system helpers

7. **legacy_slow_box** (96 lines)
   - tiny_slow_alloc_fast() function (cold/unused)
   - Legacy slab-based allocation with refill
   - TLS cache/fast cache refill from slabs
   - Remote drain handling
   - List management (move to full/free lists)
   - Marked __attribute__((cold, noinline, unused))

8. **slab_lookup_box** (77 lines)
   - registry_lookup() - O(1) hash-based lookup
   - hak_tiny_owner_slab() - public API for slab discovery
   - Linear probing search with atomic owner access
   - O(N) fallback for non-registry mode
   - Safety validation for membership checking

## Cumulative Progress (8 boxes total)

**Previously extracted** (Phase 1):
1. config_box (211 lines)
2. publish_box (419 lines)
3. globals_box (256 lines)
4. phase6_wrappers_box (122 lines)
5. ace_guard_box (100 lines)

**This phase** (Phase 2):
6. tls_state_box (224 lines)
7. legacy_slow_box (96 lines)
8. slab_lookup_box (77 lines)

**Total extracted**: 1,505 lines across 8 coherent modules
**Remaining core**: 616 lines (well-organized, focused)

## Benefits

- **Readability**: 2k monolith → focused 616-line core
- **Maintainability**: Each box has single responsibility
- **Organization**: TLS state, legacy code, lookup utilities separated
- **Build**: All modules compile successfully ✅

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 01:23:59 +09:00
+								// Registry Lookup & Owner Slab Discovery - EXTRACTED to hakmem_tiny_slab_lookup_box.inc
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Refactor: Extract 3 more Box modules from hakmem_tiny.c (-70% total reduction)

Continue hakmem_tiny.c refactoring with 3 large module extractions.

## Changes

**hakmem_tiny.c**: 995 → 616 lines (-379 lines, -38% this phase)
**Total reduction**: 2081 → 616 lines (-1465 lines, -70% cumulative) 🏆

## Extracted Modules (3 new boxes)

6. **tls_state_box** (224 lines)
   - TLS SLL enable flags and configuration
   - TLS canaries and SLL array definitions
   - Debug counters (path, ultra, allocation)
   - Frontend/backend configuration
   - TLS thread ID caching helpers
   - Frontend hit/miss counters
   - HotMag, QuickSlot, Ultra-front configuration
   - Helper functions (is_hot_class, tiny_optional_push)
   - Intelligence system helpers

7. **legacy_slow_box** (96 lines)
   - tiny_slow_alloc_fast() function (cold/unused)
   - Legacy slab-based allocation with refill
   - TLS cache/fast cache refill from slabs
   - Remote drain handling
   - List management (move to full/free lists)
   - Marked __attribute__((cold, noinline, unused))

8. **slab_lookup_box** (77 lines)
   - registry_lookup() - O(1) hash-based lookup
   - hak_tiny_owner_slab() - public API for slab discovery
   - Linear probing search with atomic owner access
   - O(N) fallback for non-registry mode
   - Safety validation for membership checking

## Cumulative Progress (8 boxes total)

**Previously extracted** (Phase 1):
1. config_box (211 lines)
2. publish_box (419 lines)
3. globals_box (256 lines)
4. phase6_wrappers_box (122 lines)
5. ace_guard_box (100 lines)

**This phase** (Phase 2):
6. tls_state_box (224 lines)
7. legacy_slow_box (96 lines)
8. slab_lookup_box (77 lines)

**Total extracted**: 1,505 lines across 8 coherent modules
**Remaining core**: 616 lines (well-organized, focused)

## Benefits

- **Readability**: 2k monolith → focused 616-line core
- **Maintainability**: Each box has single responsibility
- **Organization**: TLS state, legacy code, lookup utilities separated
- **Build**: All modules compile successfully ✅

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 01:23:59 +09:00
+								#include "hakmem_tiny_slab_lookup_box.inc"
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
 								// Function: move_to_full_list() - 20 lines (lines 1104-1123)
 								// Move slab to full list
 								// Function: move_to_free_list() - 20 lines (lines 1126-1145)
 								// Move slab to free list
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Public API
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Phase 2D-2: Initialization function (extracted to hakmem_tiny_init.inc)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								#include "hakmem_tiny_init.inc"
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// 3-Layer Architecture (2025-11-01 Simplification)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Layer 1: TLS Bump Allocator (ultra-fast, 2-3 instructions/op)
 								#include "hakmem_tiny_bump.inc.h"
 								// Layer 2: TLS Small Magazine (fast, 5-10 instructions/op)
 								#include "hakmem_tiny_smallmag.inc.h"
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Comprehensive legacy cleanup and architecture consolidation

Summary of Changes:

MOVED TO ARCHIVE:
- core/hakmem_tiny_legacy_slow_box.inc → archive/
  * Slow path legacy code preserved for reference
  * Superseded by Gatekeeper Box architecture

- core/superslab_allocate.c → archive/superslab_allocate_legacy.c
  * Legacy SuperSlab allocation implementation
  * Functionality integrated into new Box system

- core/superslab_head.c → archive/superslab_head_legacy.c
  * Legacy slab head management
  * Refactored through Box architecture

REMOVED DEAD CODE:
- Eliminated unused allocation policy variants from ss_allocation_box.c
  * Reduced from 127+ lines of conditional logic to focused implementation
  * Removed: old policy branches, unused allocation strategies
  * Kept: current Box-based allocation path

ADDED NEW INFRASTRUCTURE:
- core/superslab_head_stub.c (41 lines)
  * Minimal stub for backward compatibility
  * Delegates to new architecture

- Enhanced core/superslab_cache.c (75 lines added)
  * Added missing API functions for cache management
  * Proper interface for SuperSlab cache integration

REFACTORED CORE SYSTEMS:
- core/hakmem_super_registry.c
  * Moved registration logic from scattered locations
  * Centralized SuperSlab registry management

- core/hakmem_tiny.c
  * Removed 27 lines of redundant initialization
  * Simplified through Box architecture

- core/hakmem_tiny_alloc.inc
  * Streamlined allocation path to use Gatekeeper
  * Removed legacy decision logic

- core/box/ss_allocation_box.c/h
  * Dramatically simplified allocation policy
  * Removed conditional branches for unused strategies
  * Focused on current Box-based approach

BUILD SYSTEM:
- Updated Makefile for archive structure
- Removed obsolete object file references
- Maintained build compatibility

SAFETY & TESTING:
- All deletions verified: no broken references
- Build verification: RELEASE=0 and RELEASE=1 pass
- Smoke tests: 100% pass rate
- Functional verification: allocation/free intact

Architecture Consolidation:
Before: Multiple overlapping allocation paths with legacy code branches
After:  Single unified path through Gatekeeper Boxes with clear architecture

Benefits:
- Reduced code size and complexity
- Improved maintainability
- Single source of truth for allocation logic
- Better diagnostic/observability hooks
- Foundation for future optimizations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 14:22:48 +09:00
+								// Phase 6 Fast Path Option (Metadata Header)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Comprehensive legacy cleanup and architecture consolidation

Summary of Changes:

MOVED TO ARCHIVE:
- core/hakmem_tiny_legacy_slow_box.inc → archive/
  * Slow path legacy code preserved for reference
  * Superseded by Gatekeeper Box architecture

- core/superslab_allocate.c → archive/superslab_allocate_legacy.c
  * Legacy SuperSlab allocation implementation
  * Functionality integrated into new Box system

- core/superslab_head.c → archive/superslab_head_legacy.c
  * Legacy slab head management
  * Refactored through Box architecture

REMOVED DEAD CODE:
- Eliminated unused allocation policy variants from ss_allocation_box.c
  * Reduced from 127+ lines of conditional logic to focused implementation
  * Removed: old policy branches, unused allocation strategies
  * Kept: current Box-based allocation path

ADDED NEW INFRASTRUCTURE:
- core/superslab_head_stub.c (41 lines)
  * Minimal stub for backward compatibility
  * Delegates to new architecture

- Enhanced core/superslab_cache.c (75 lines added)
  * Added missing API functions for cache management
  * Proper interface for SuperSlab cache integration

REFACTORED CORE SYSTEMS:
- core/hakmem_super_registry.c
  * Moved registration logic from scattered locations
  * Centralized SuperSlab registry management

- core/hakmem_tiny.c
  * Removed 27 lines of redundant initialization
  * Simplified through Box architecture

- core/hakmem_tiny_alloc.inc
  * Streamlined allocation path to use Gatekeeper
  * Removed legacy decision logic

- core/box/ss_allocation_box.c/h
  * Dramatically simplified allocation policy
  * Removed conditional branches for unused strategies
  * Focused on current Box-based approach

BUILD SYSTEM:
- Updated Makefile for archive structure
- Removed obsolete object file references
- Maintained build compatibility

SAFETY & TESTING:
- All deletions verified: no broken references
- Build verification: RELEASE=0 and RELEASE=1 pass
- Smoke tests: 100% pass rate
- Functional verification: allocation/free intact

Architecture Consolidation:
Before: Multiple overlapping allocation paths with legacy code branches
After:  Single unified path through Gatekeeper Boxes with clear architecture

Benefits:
- Reduced code size and complexity
- Improved maintainability
- Single source of truth for allocation logic
- Better diagnostic/observability hooks
- Foundation for future optimizations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 14:22:48 +09:00
+								// Phase 6-1.6: Metadata Header (recommended)
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								//   - Enable: -DHAKMEM_TINY_PHASE6_METADATA=1
 								//   - Speed: 450-480 M ops/sec (expected, Phase 6-1 level)
 								//   - Memory: ~6-12% overhead (8 bytes/allocation)
 								//   - Method: Store pool_type + size_class in 8-byte header
 								//   - Benefit: Extends to ALL pools (Tiny/Mid/L25/Whale)
 								//   - Eliminates: Registry lookups, mid_lookup, owner checks
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								// Forward declarations for Phase 6 alloc/free functions
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
-												Add Box I (Integrity), Box E (Expansion), and comprehensive P0 debugging infrastructure

## Major Additions

### 1. Box I: Integrity Verification System (NEW - 703 lines)
- Files: core/box/integrity_box.h (267 lines), core/box/integrity_box.c (436 lines)
- Purpose: Unified integrity checking across all HAKMEM subsystems
- Features:
  * 4-level integrity checking (0-4, compile-time controlled)
  * Priority 1: TLS array bounds validation
  * Priority 2: Freelist pointer validation
  * Priority 3: TLS canary monitoring
  * Priority ALPHA: Slab metadata invariant checking (5 invariants)
  * Atomic statistics tracking (thread-safe)
  * Beautiful BOX_BOUNDARY design pattern

### 2. Box E: SuperSlab Expansion System (COMPLETE)
- Files: core/box/superslab_expansion_box.h, core/box/superslab_expansion_box.c
- Purpose: Safe SuperSlab expansion with TLS state guarantee
- Features:
  * Immediate slab 0 binding after expansion
  * TLS state snapshot and restoration
  * Design by Contract (pre/post-conditions, invariants)
  * Thread-safe with mutex protection

### 3. Comprehensive Integrity Checking System
- File: core/hakmem_tiny_integrity.h (NEW)
- Unified validation functions for all allocator subsystems
- Uninitialized memory pattern detection (0xa2, 0xcc, 0xdd, 0xfe)
- Pointer range validation (null-page, kernel-space)

### 4. P0 Bug Investigation - Root Cause Identified
**Bug**: SEGV at iteration 28440 (deterministic with seed 42)
**Pattern**: 0xa2a2a2a2a2a2a2a2 (uninitialized/ASan poisoning)
**Location**: TLS SLL (Single-Linked List) cache layer
**Root Cause**: Race condition or use-after-free in TLS list management (class 0)

**Detection**: Box I successfully caught invalid pointer at exact crash point

### 5. Defensive Improvements
- Defensive memset in SuperSlab allocation (all metadata arrays)
- Enhanced pointer validation with pattern detection
- BOX_BOUNDARY markers throughout codebase (beautiful modular design)
- 5 metadata invariant checks in allocation/free/refill paths

## Integration Points
- Modified 13 files with Box I/E integration
- Added 10+ BOX_BOUNDARY markers
- 5 critical integrity check points in P0 refill path

## Test Results (100K iterations)
- Baseline: 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- P0 Bug: Still crashes at 28440 iterations (TLS SLL race condition)
- Root cause: Identified but not yet fixed (requires deeper investigation)

## Performance
- Box I overhead: Zero in release builds (HAKMEM_INTEGRITY_LEVEL=0)
- Debug builds: Full validation enabled (HAKMEM_INTEGRITY_LEVEL=4)
- Beautiful modular design maintains clean separation of concerns

## Known Issues
- P0 Bug at 28440 iterations: Race condition in TLS SLL cache (class 0)
- Cause: Use-after-free or race in remote free draining
- Next step: Valgrind investigation to pinpoint exact corruption location

## Code Quality
- Total new code: ~1400 lines (Box I + Box E + integrity system)
- Design: Beautiful Box Theory with clear boundaries
- Modularity: Complete separation of concerns
- Documentation: Comprehensive inline comments and BOX_BOUNDARY markers

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-12 02:45:00 +09:00
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Refactor: Extract 5 Box modules from hakmem_tiny.c (-52% size reduction)

Split hakmem_tiny.c (2081 lines) into focused modules for better maintainability.

## Changes

**hakmem_tiny.c**: 2081 → 995 lines (-1086 lines, -52% reduction)

## Extracted Modules (5 boxes)

1. **config_box** (211 lines)
   - Size class tables, integrity counters
   - Debug flags, benchmark macros
   - HAK_RET_ALLOC/HAK_STAT_FREE instrumentation

2. **publish_box** (419 lines)
   - Publish/Adopt counters and statistics
   - Bench mailbox, partial ring
   - Live cap/Hot slot management
   - TLS helper functions (tiny_tls_default_*)

3. **globals_box** (256 lines)
   - Global variable declarations (~70 variables)
   - TinyPool instance and initialization flag
   - TLS variables (g_tls_lists, g_fast_head, g_fast_count)
   - SuperSlab configuration (partial ring, empty reserves)
   - Adopt gate functions

4. **phase6_wrappers_box** (122 lines)
   - Phase 6 Box Theory wrapper layer
   - hak_tiny_alloc_fast_wrapper()
   - hak_tiny_free_fast_wrapper()
   - Diagnostic instrumentation

5. **ace_guard_box** (100 lines)
   - ACE Learning Layer (hkm_ace_set_drain_threshold)
   - FastCache API (tiny_fc_room, tiny_fc_push_bulk)
   - Tiny Guard debugging system (5 functions)

## Benefits

- **Readability**: Giant 2k file → focused 1k core + 5 coherent modules
- **Maintainability**: Each box has clear responsibility and boundaries
- **Build**: All modules compile successfully ✅

## Technical Details

- Phase 1: ChatGPT extracted config_box + publish_box (-625 lines)
- Phase 2-4: Claude extracted globals_box + phase6_wrappers_box + ace_guard_box (-461 lines)
- All extractions use .inc files (same translation unit, preserves static/TLS linkage)
- Fixed Makefile: Added tiny_sizeclass_hist_box.o to OBJS_BASE and BENCH_HAKMEM_OBJS_BASE

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 01:16:45 +09:00
+								// Phase 6 Wrapper Functions - EXTRACTED to hakmem_tiny_phase6_wrappers_box.inc
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Refactor: Extract 5 Box modules from hakmem_tiny.c (-52% size reduction)

Split hakmem_tiny.c (2081 lines) into focused modules for better maintainability.

## Changes

**hakmem_tiny.c**: 2081 → 995 lines (-1086 lines, -52% reduction)

## Extracted Modules (5 boxes)

1. **config_box** (211 lines)
   - Size class tables, integrity counters
   - Debug flags, benchmark macros
   - HAK_RET_ALLOC/HAK_STAT_FREE instrumentation

2. **publish_box** (419 lines)
   - Publish/Adopt counters and statistics
   - Bench mailbox, partial ring
   - Live cap/Hot slot management
   - TLS helper functions (tiny_tls_default_*)

3. **globals_box** (256 lines)
   - Global variable declarations (~70 variables)
   - TinyPool instance and initialization flag
   - TLS variables (g_tls_lists, g_fast_head, g_fast_count)
   - SuperSlab configuration (partial ring, empty reserves)
   - Adopt gate functions

4. **phase6_wrappers_box** (122 lines)
   - Phase 6 Box Theory wrapper layer
   - hak_tiny_alloc_fast_wrapper()
   - hak_tiny_free_fast_wrapper()
   - Diagnostic instrumentation

5. **ace_guard_box** (100 lines)
   - ACE Learning Layer (hkm_ace_set_drain_threshold)
   - FastCache API (tiny_fc_room, tiny_fc_push_bulk)
   - Tiny Guard debugging system (5 functions)

## Benefits

- **Readability**: Giant 2k file → focused 1k core + 5 coherent modules
- **Maintainability**: Each box has clear responsibility and boundaries
- **Build**: All modules compile successfully ✅

## Technical Details

- Phase 1: ChatGPT extracted config_box + publish_box (-625 lines)
- Phase 2-4: Claude extracted globals_box + phase6_wrappers_box + ace_guard_box (-461 lines)
- All extractions use .inc files (same translation unit, preserves static/TLS linkage)
- Fixed Makefile: Added tiny_sizeclass_hist_box.o to OBJS_BASE and BENCH_HAKMEM_OBJS_BASE

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 01:16:45 +09:00
+								#include "hakmem_tiny_phase6_wrappers_box.inc"
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
 								// Layer 1-3: Main allocation function (simplified)
-												Phase 1: Box Theory refactoring + include reduction

Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free

Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic

Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)

Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-06 21:54:12 +09:00
+								// Build-time configurable via: -DHAKMEM_TINY_USE_NEW_3LAYER=1
 								#ifndef HAKMEM_TINY_USE_NEW_3LAYER
 								#define HAKMEM_TINY_USE_NEW_3LAYER 0  // default OFF (legacy path)
 								#endif
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								#if HAKMEM_TINY_USE_NEW_3LAYER
 								#include "hakmem_tiny_alloc_new.inc"
 								#else
 								// Old 6-7 layer architecture (backup)
 								#include "hakmem_tiny_alloc.inc"
 								#endif
 								#include "hakmem_tiny_slow.inc"
 								// Free path implementations
 								#include "hakmem_tiny_free.inc"
-												Front-Direct implementation: SS→FC direct refill + SLL complete bypass

## Summary

Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)

## New Modules

- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
  - Remote drain → Freelist → Carve priority
  - Header restoration for C1-C6 (NOT C0/C7)
  - ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN

- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition

## Allocation Path (core/tiny_alloc_fast.inc.h)

- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
  - Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
  - Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)

## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)

- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)

## Legacy Sealing

- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry

## ENV Controls

- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)

## Benchmarks (Front-Direct Enabled)

```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
     HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
     HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
     HAKMEM_TINY_BUMP_CHUNK=256

bench_random_mixed (16-1040B random, 200K iter):
  256 slots: 1.44M ops/s (STABLE, 0 SEGV)
  128 slots: 1.44M ops/s (STABLE, 0 SEGV)

bench_fixed_size (fixed size, 200K iter):
  256B: 4.06M ops/s (has debug logs, expected >10M without logs)
  128B: Similar (debug logs affect)
```

## Verification

- TRACE_RING test (10K iter): **0 SLL events** detected ✅
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV

## Next Steps

- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)

Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink

											
										
										
											2025-11-14 05:41:49 +09:00
+								// ---- Phase 1: Provide default batch-refill symbol (fallback to small refill)
 								// Allows runtime gate HAKMEM_TINY_REFILL_BATCH=1 without requiring a rebuild.
 								#ifndef HAKMEM_TINY_P0_BATCH_REFILL
 								int sll_refill_small_from_ss(int class_idx, int max_take);
 								__attribute__((weak)) int sll_refill_batch_from_ss(int class_idx, int max_take)
 								{
 								    return sll_refill_small_from_ss(class_idx, max_take);
 								}
 								#endif
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Function: hak_tiny_trim() - 116 lines (lines 1164-1279)
 								// Public trim and cleanup operation for lifecycle management
 								// Forward decl for internal registry lookup used by ultra safety validation
 								static TinySlab* registry_lookup(uintptr_t slab_base);
-												ENV Cleanup: Delete Ultra HEAP & BG Remote dead code (-1,096 LOC)

Deleted files (11):
- core/ultra/ directory (6 files: tiny_ultra_heap.*, tiny_ultra_page_arena.*)
- core/front/tiny_ultrafront.h
- core/tiny_ultra_fast.inc.h
- core/hakmem_tiny_ultra_front.inc.h
- core/hakmem_tiny_ultra_simple.inc
- core/hakmem_tiny_ultra_batch_box.inc

Edited files (10):
- core/hakmem_tiny.c: Remove Ultra HEAP #includes, move ultra_batch_for_class()
- core/hakmem_tiny_tls_state_box.inc: Delete TinyUltraFront, g_ultra_simple
- core/hakmem_tiny_phase6_wrappers_box.inc: Delete ULTRA_SIMPLE block
- core/hakmem_tiny_alloc.inc: Delete Ultra-Front code block
- core/hakmem_tiny_init.inc: Delete ULTRA_SIMPLE ENV loading
- core/hakmem_tiny_remote_target.{c,h}: Delete g_bg_remote_enable/batch
- core/tiny_refill.h: Remove BG Remote check (always break)
- core/hakmem_tiny_background.inc: Delete BG Remote drain loop

Deleted ENV variables:
- HAKMEM_TINY_ULTRA_HEAP (build flag, undefined)
- HAKMEM_TINY_ULTRA_L0
- HAKMEM_TINY_ULTRA_HEAP_DUMP
- HAKMEM_TINY_ULTRA_PAGE_DUMP
- HAKMEM_TINY_ULTRA_FRONT
- HAKMEM_TINY_BG_REMOTE (no getenv, dead code)
- HAKMEM_TINY_BG_REMOTE_BATCH (no getenv, dead code)
- HAKMEM_TINY_ULTRA_SIMPLE (references only)

Impact:
- Code reduction: -1,096 lines
- Binary size: 305KB → 304KB (-1KB)
- Build: PASS
- Sanity: 15.69M ops/s (3 runs avg)
- Larson: 1 crash observed (seed 43, likely existing instability)

Notes:
- Ultra HEAP never compiled (#if HAKMEM_TINY_ULTRA_HEAP undefined)
- BG Remote variables never initialized (g_bg_remote_enable always 0)
- Ultra SLIM (ultra_slim_alloc_box.h) preserved (active 4-layer path)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-27 04:35:47 +09:00
+								// ultra_sll_cap_for_class moved earlier in file (before hakmem_tiny_free.inc)
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
 								static inline int ultra_validate_sll_head(int class_idx, void* head) {
 								    uintptr_t base = ((uintptr_t)head) & ~(TINY_SLAB_SIZE - 1);
 								    TinySlab* owner = registry_lookup(base);
 								    if (!owner) return 0;
 								    uintptr_t start = (uintptr_t)owner->base;
 								    if ((uintptr_t)head < start || (uintptr_t)head >= start + TINY_SLAB_SIZE) return 0;
 								    return (owner->class_idx == class_idx);
 								}
 								// Optional: wrapper TLS guard（ラッパー再入検知をTLSカウンタで）
 								#ifndef HAKMEM_WRAPPER_TLS_GUARD
 								#define HAKMEM_WRAPPER_TLS_GUARD 0
 								#endif
 								#if HAKMEM_WRAPPER_TLS_GUARD
 								extern __thread int g_tls_in_wrapper;
 								#endif
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3)
-												Fix include order in hakmem.c - move hak_kpi_util.inc.h before hak_core_init.inc.h

Problem: hak_core_init.inc.h references KPI measurement variables
(g_latency_histogram, g_latency_samples, g_baseline_soft_pf, etc.)
but hakmem.c was including hak_kpi_util.inc.h AFTER hak_core_init.inc.h,
causing undefined reference errors.

Solution: Reorder includes so hak_kpi_util.inc.h (definition) comes
before hak_core_init.inc.h (usage).

Build result: ✅ Success (libhakmem.so 547KB, 0 errors)

Minor changes:
- Added extern __thread declarations for TLS SLL debug variables
- Added signal handler logging for debug_dump_last_push
- Improved hakmem_tiny.c structure for Phase 2 preparation

🤖 Generated with Claude Code + Task Agent

Co-Authored-By: Gemini <gemini@example.com>
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 13:28:44 +09:00
+								// ============================================================================
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Function: tiny_tls_cache_drain() - 90 lines (lines 1314-1403)
 								// Static function for draining TLS caches
 								//
 								// Function: tiny_apply_mem_diet() - 20 lines (lines 1405-1424)
 								// Static function for memory diet mode application
 								//
 								// Phase 2D-3: Lifecycle management functions (226 lines total)
 								#include "hakmem_tiny_lifecycle.inc"
 								// Phase 2D-4 (FINAL): Slab management functions (142 lines total)
 								#include "hakmem_tiny_slab_mgmt.inc"
-												Code Cleanup: Remove false positives, redundant validations, and reduce verbose logging

Following the C7 stride upgrade fix (commit 23c0d9541), this commit performs
comprehensive cleanup to improve code quality and reduce debug noise.

## Changes

### 1. Disable False Positive Checks (tiny_nextptr.h)
- **Disabled**: NXT_MISALIGN validation block with `#if 0`
- **Reason**: Produces false positives due to slab base offsets (2048, 65536)
  not being stride-aligned, causing all blocks to appear "misaligned"
- **TODO**: Reimplement to check stride DISTANCE between consecutive blocks
  instead of absolute alignment to stride boundaries

### 2. Remove Redundant Geometry Validations

**hakmem_tiny_refill_p0.inc.h (P0 batch refill)**
- Removed 25-line CARVE_GEOMETRY_FIX validation block
- Replaced with NOTE explaining redundancy
- **Reason**: Stride table is now correct in tiny_block_stride_for_class(),
  defense-in-depth validation adds overhead without benefit

**ss_legacy_backend_box.c (legacy backend)**
- Removed 18-line LEGACY_FIX_GEOMETRY validation block
- Replaced with NOTE explaining redundancy
- **Reason**: Shared_pool validates geometry at acquisition time

### 3. Reduce Verbose Logging

**hakmem_shared_pool.c (sp_fix_geometry_if_needed)**
- Made SP_FIX_GEOMETRY logging conditional on `!HAKMEM_BUILD_RELEASE`
- **Reason**: Geometry fixes are expected during stride upgrades,
  no need to log in release builds

### 4. Verification
- Build: ✅ Successful (LTO warnings expected)
- Test: ✅ 10K iterations (1.87M ops/s, no crashes)
- NXT_MISALIGN false positives: ✅ Eliminated

## Files Modified
- core/tiny_nextptr.h - Disabled false positive NXT_MISALIGN check
- core/hakmem_tiny_refill_p0.inc.h - Removed redundant CARVE validation
- core/box/ss_legacy_backend_box.c - Removed redundant LEGACY validation
- core/hakmem_shared_pool.c - Made SP_FIX_GEOMETRY logging debug-only

## Impact
- **Code clarity**: Removed 43 lines of redundant validation code
- **Debug noise**: Reduced false positive diagnostics
- **Performance**: Eliminated overhead from redundant geometry checks
- **Maintainability**: Single source of truth for geometry validation

🧹 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 23:00:24 +09:00
+								// Tiny Heap v2 stats dump (opt-in)
 								void tiny_heap_v2_print_stats(void) {
-												Priority-2 ENV Cache: hakmem_tiny.c (3箇所置換)

【置換ファイル】
- core/hakmem_tiny.c (3箇所 → ENV Cache)

【変更詳細】
1. tiny_heap_v2_print_stats():
   - getenv("HAKMEM_TINY_HEAP_V2_STATS") → HAK_ENV_TINY_HEAP_V2_STATS()

2. tiny_alloc_1024_diag_atexit():
   - getenv("HAKMEM_TINY_ALLOC_1024_METRIC") → HAK_ENV_TINY_ALLOC_1024_METRIC()

3. tiny_tls_sll_diag_atexit():
   - getenv("HAKMEM_TINY_SLL_DIAG") → HAK_ENV_TINY_SLL_DIAG()

- #include "hakmem_env_cache.h" 追加

【効果】
- 診断系atexit()関数からgetenv()呼び出しを排除
- 既存ENV変数を利用 (新規追加なし、カウント: 46変数維持)

【テスト】
✅ make shared → 成功
✅ /tmp/test_mixed3_final → PASSED

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-02 20:54:03 +09:00
+								    // Priority-2: Use cached ENV
 								    if (!HAK_ENV_TINY_HEAP_V2_STATS()) return;
-												Code Cleanup: Remove false positives, redundant validations, and reduce verbose logging

Following the C7 stride upgrade fix (commit 23c0d9541), this commit performs
comprehensive cleanup to improve code quality and reduce debug noise.

## Changes

### 1. Disable False Positive Checks (tiny_nextptr.h)
- **Disabled**: NXT_MISALIGN validation block with `#if 0`
- **Reason**: Produces false positives due to slab base offsets (2048, 65536)
  not being stride-aligned, causing all blocks to appear "misaligned"
- **TODO**: Reimplement to check stride DISTANCE between consecutive blocks
  instead of absolute alignment to stride boundaries

### 2. Remove Redundant Geometry Validations

**hakmem_tiny_refill_p0.inc.h (P0 batch refill)**
- Removed 25-line CARVE_GEOMETRY_FIX validation block
- Replaced with NOTE explaining redundancy
- **Reason**: Stride table is now correct in tiny_block_stride_for_class(),
  defense-in-depth validation adds overhead without benefit

**ss_legacy_backend_box.c (legacy backend)**
- Removed 18-line LEGACY_FIX_GEOMETRY validation block
- Replaced with NOTE explaining redundancy
- **Reason**: Shared_pool validates geometry at acquisition time

### 3. Reduce Verbose Logging

**hakmem_shared_pool.c (sp_fix_geometry_if_needed)**
- Made SP_FIX_GEOMETRY logging conditional on `!HAKMEM_BUILD_RELEASE`
- **Reason**: Geometry fixes are expected during stride upgrades,
  no need to log in release builds

### 4. Verification
- Build: ✅ Successful (LTO warnings expected)
- Test: ✅ 10K iterations (1.87M ops/s, no crashes)
- NXT_MISALIGN false positives: ✅ Eliminated

## Files Modified
- core/tiny_nextptr.h - Disabled false positive NXT_MISALIGN check
- core/hakmem_tiny_refill_p0.inc.h - Removed redundant CARVE validation
- core/box/ss_legacy_backend_box.c - Removed redundant LEGACY validation
- core/hakmem_shared_pool.c - Made SP_FIX_GEOMETRY logging debug-only

## Impact
- **Code clarity**: Removed 43 lines of redundant validation code
- **Debug noise**: Reduced false positive diagnostics
- **Performance**: Eliminated overhead from redundant geometry checks
- **Maintainability**: Single source of truth for geometry validation

🧹 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 23:00:24 +09:00
 								    fprintf(stderr, "\n[HeapV2] TLS magazine stats (per class, thread-local)\n");
 								    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
 								        TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[cls];
 								        TinyHeapV2Stats* st = &g_tiny_heap_v2_stats[cls];
 								        fprintf(stderr,
 								                "C%d: top=%d alloc_calls=%llu mag_hits=%llu refill_calls=%llu refill_blocks=%llu backend_oom=%llu\n",
 								                cls,
 								                mag->top,
 								                (unsigned long long)st->alloc_calls,
 								                (unsigned long long)st->mag_hits,
 								                (unsigned long long)st->refill_calls,
 								                (unsigned long long)st->refill_blocks,
 								                (unsigned long long)st->backend_oom);
 								    }
 								}
 								static void tiny_heap_v2_stats_atexit(void) __attribute__((destructor));
 								static void tiny_heap_v2_stats_atexit(void) {
 								    tiny_heap_v2_print_stats();
 								}
 								// Size→class routing for >=1024B (env: HAKMEM_TINY_ALLOC_1024_METRIC)
 								_Atomic uint64_t g_tiny_alloc_ge1024[TINY_NUM_CLASSES] = {0};
 								static void tiny_alloc_1024_diag_atexit(void) __attribute__((destructor));
 								static void tiny_alloc_1024_diag_atexit(void) {
-												Priority-2 ENV Cache: hakmem_tiny.c (3箇所置換)

【置換ファイル】
- core/hakmem_tiny.c (3箇所 → ENV Cache)

【変更詳細】
1. tiny_heap_v2_print_stats():
   - getenv("HAKMEM_TINY_HEAP_V2_STATS") → HAK_ENV_TINY_HEAP_V2_STATS()

2. tiny_alloc_1024_diag_atexit():
   - getenv("HAKMEM_TINY_ALLOC_1024_METRIC") → HAK_ENV_TINY_ALLOC_1024_METRIC()

3. tiny_tls_sll_diag_atexit():
   - getenv("HAKMEM_TINY_SLL_DIAG") → HAK_ENV_TINY_SLL_DIAG()

- #include "hakmem_env_cache.h" 追加

【効果】
- 診断系atexit()関数からgetenv()呼び出しを排除
- 既存ENV変数を利用 (新規追加なし、カウント: 46変数維持)

【テスト】
✅ make shared → 成功
✅ /tmp/test_mixed3_final → PASSED

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-02 20:54:03 +09:00
+								    // Priority-2: Use cached ENV
 								    if (!HAK_ENV_TINY_ALLOC_1024_METRIC()) return;
-												Code Cleanup: Remove false positives, redundant validations, and reduce verbose logging

Following the C7 stride upgrade fix (commit 23c0d9541), this commit performs
comprehensive cleanup to improve code quality and reduce debug noise.

## Changes

### 1. Disable False Positive Checks (tiny_nextptr.h)
- **Disabled**: NXT_MISALIGN validation block with `#if 0`
- **Reason**: Produces false positives due to slab base offsets (2048, 65536)
  not being stride-aligned, causing all blocks to appear "misaligned"
- **TODO**: Reimplement to check stride DISTANCE between consecutive blocks
  instead of absolute alignment to stride boundaries

### 2. Remove Redundant Geometry Validations

**hakmem_tiny_refill_p0.inc.h (P0 batch refill)**
- Removed 25-line CARVE_GEOMETRY_FIX validation block
- Replaced with NOTE explaining redundancy
- **Reason**: Stride table is now correct in tiny_block_stride_for_class(),
  defense-in-depth validation adds overhead without benefit

**ss_legacy_backend_box.c (legacy backend)**
- Removed 18-line LEGACY_FIX_GEOMETRY validation block
- Replaced with NOTE explaining redundancy
- **Reason**: Shared_pool validates geometry at acquisition time

### 3. Reduce Verbose Logging

**hakmem_shared_pool.c (sp_fix_geometry_if_needed)**
- Made SP_FIX_GEOMETRY logging conditional on `!HAKMEM_BUILD_RELEASE`
- **Reason**: Geometry fixes are expected during stride upgrades,
  no need to log in release builds

### 4. Verification
- Build: ✅ Successful (LTO warnings expected)
- Test: ✅ 10K iterations (1.87M ops/s, no crashes)
- NXT_MISALIGN false positives: ✅ Eliminated

## Files Modified
- core/tiny_nextptr.h - Disabled false positive NXT_MISALIGN check
- core/hakmem_tiny_refill_p0.inc.h - Removed redundant CARVE validation
- core/box/ss_legacy_backend_box.c - Removed redundant LEGACY validation
- core/hakmem_shared_pool.c - Made SP_FIX_GEOMETRY logging debug-only

## Impact
- **Code clarity**: Removed 43 lines of redundant validation code
- **Debug noise**: Reduced false positive diagnostics
- **Performance**: Eliminated overhead from redundant geometry checks
- **Maintainability**: Single source of truth for geometry validation

🧹 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 23:00:24 +09:00
+								    fprintf(stderr, "\n[ALLOC_GE1024] per-class counts (size>=1024)\n");
 								    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
 								        uint64_t v = atomic_load_explicit(&g_tiny_alloc_ge1024[cls], memory_order_relaxed);
 								        if (v) {
 								            fprintf(stderr, " C%d=%llu", cls, (unsigned long long)v);
 								        }
 								    }
 								    fprintf(stderr, "\n");
 								}
 								// TLS SLL pointer diagnostics (optional)
 								extern _Atomic uint64_t g_tls_sll_invalid_head[TINY_NUM_CLASSES];
 								extern _Atomic uint64_t g_tls_sll_invalid_push[TINY_NUM_CLASSES];
 								static void tiny_tls_sll_diag_atexit(void) __attribute__((destructor));
 								static void tiny_tls_sll_diag_atexit(void) {
-												ENV Cleanup Step 18: Gate HAKMEM_TINY_SLL_DIAG

Gate the SLL diagnostics debug variable behind #if !HAKMEM_BUILD_RELEASE:
- HAKMEM_TINY_SLL_DIAG: Controls singly-linked list integrity diagnostics
- 5 call sites gated (2 already gated, 5 needed gating):

Files modified:
- core/box/tls_sll_box.h:117 (tls_sll_dump_tls_window)
- core/box/tls_sll_box.h:191 (tls_sll_diag_next)
- core/hakmem_tiny.c:629 (tiny_tls_sll_diag_atexit destructor)
- core/hakmem_tiny_superslab.c:142 (remote drain diag)
- core/tiny_superslab_free.inc.h:132 (header mismatch detector)

Already gated:
- core/box/free_local_box.c:38 (already gated at line 33)
- core/box/free_local_box.c:87 (already gated at line 82)

Performance: 30.9M ops/s (baseline maintained)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-28 04:39:20 +09:00
+								#if !HAKMEM_BUILD_RELEASE
-												Priority-2 ENV Cache: hakmem_tiny.c (3箇所置換)

【置換ファイル】
- core/hakmem_tiny.c (3箇所 → ENV Cache)

【変更詳細】
1. tiny_heap_v2_print_stats():
   - getenv("HAKMEM_TINY_HEAP_V2_STATS") → HAK_ENV_TINY_HEAP_V2_STATS()

2. tiny_alloc_1024_diag_atexit():
   - getenv("HAKMEM_TINY_ALLOC_1024_METRIC") → HAK_ENV_TINY_ALLOC_1024_METRIC()

3. tiny_tls_sll_diag_atexit():
   - getenv("HAKMEM_TINY_SLL_DIAG") → HAK_ENV_TINY_SLL_DIAG()

- #include "hakmem_env_cache.h" 追加

【効果】
- 診断系atexit()関数からgetenv()呼び出しを排除
- 既存ENV変数を利用 (新規追加なし、カウント: 46変数維持)

【テスト】
✅ make shared → 成功
✅ /tmp/test_mixed3_final → PASSED

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-02 20:54:03 +09:00
+								    // Priority-2: Use cached ENV
 								    if (!HAK_ENV_TINY_SLL_DIAG()) return;
-												Code Cleanup: Remove false positives, redundant validations, and reduce verbose logging

Following the C7 stride upgrade fix (commit 23c0d9541), this commit performs
comprehensive cleanup to improve code quality and reduce debug noise.

## Changes

### 1. Disable False Positive Checks (tiny_nextptr.h)
- **Disabled**: NXT_MISALIGN validation block with `#if 0`
- **Reason**: Produces false positives due to slab base offsets (2048, 65536)
  not being stride-aligned, causing all blocks to appear "misaligned"
- **TODO**: Reimplement to check stride DISTANCE between consecutive blocks
  instead of absolute alignment to stride boundaries

### 2. Remove Redundant Geometry Validations

**hakmem_tiny_refill_p0.inc.h (P0 batch refill)**
- Removed 25-line CARVE_GEOMETRY_FIX validation block
- Replaced with NOTE explaining redundancy
- **Reason**: Stride table is now correct in tiny_block_stride_for_class(),
  defense-in-depth validation adds overhead without benefit

**ss_legacy_backend_box.c (legacy backend)**
- Removed 18-line LEGACY_FIX_GEOMETRY validation block
- Replaced with NOTE explaining redundancy
- **Reason**: Shared_pool validates geometry at acquisition time

### 3. Reduce Verbose Logging

**hakmem_shared_pool.c (sp_fix_geometry_if_needed)**
- Made SP_FIX_GEOMETRY logging conditional on `!HAKMEM_BUILD_RELEASE`
- **Reason**: Geometry fixes are expected during stride upgrades,
  no need to log in release builds

### 4. Verification
- Build: ✅ Successful (LTO warnings expected)
- Test: ✅ 10K iterations (1.87M ops/s, no crashes)
- NXT_MISALIGN false positives: ✅ Eliminated

## Files Modified
- core/tiny_nextptr.h - Disabled false positive NXT_MISALIGN check
- core/hakmem_tiny_refill_p0.inc.h - Removed redundant CARVE validation
- core/box/ss_legacy_backend_box.c - Removed redundant LEGACY validation
- core/hakmem_shared_pool.c - Made SP_FIX_GEOMETRY logging debug-only

## Impact
- **Code clarity**: Removed 43 lines of redundant validation code
- **Debug noise**: Reduced false positive diagnostics
- **Performance**: Eliminated overhead from redundant geometry checks
- **Maintainability**: Single source of truth for geometry validation

🧹 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 23:00:24 +09:00
+								    fprintf(stderr, "\n[TLS_SLL_DIAG] invalid head/push counts per class\n");
 								    for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
 								        uint64_t ih = atomic_load_explicit(&g_tls_sll_invalid_head[cls], memory_order_relaxed);
 								        uint64_t ip = atomic_load_explicit(&g_tls_sll_invalid_push[cls], memory_order_relaxed);
 								        if (ih || ip) {
 								            fprintf(stderr, " C%d: invalid_head=%llu invalid_push=%llu\n",
 								                    cls, (unsigned long long)ih, (unsigned long long)ip);
 								        }
 								    }
-												ENV Cleanup Step 18: Gate HAKMEM_TINY_SLL_DIAG

Gate the SLL diagnostics debug variable behind #if !HAKMEM_BUILD_RELEASE:
- HAKMEM_TINY_SLL_DIAG: Controls singly-linked list integrity diagnostics
- 5 call sites gated (2 already gated, 5 needed gating):

Files modified:
- core/box/tls_sll_box.h:117 (tls_sll_dump_tls_window)
- core/box/tls_sll_box.h:191 (tls_sll_diag_next)
- core/hakmem_tiny.c:629 (tiny_tls_sll_diag_atexit destructor)
- core/hakmem_tiny_superslab.c:142 (remote drain diag)
- core/tiny_superslab_free.inc.h:132 (header mismatch detector)

Already gated:
- core/box/free_local_box.c:38 (already gated at line 33)
- core/box/free_local_box.c:87 (already gated at line 82)

Performance: 30.9M ops/s (baseline maintained)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-28 04:39:20 +09:00
+								#endif
-												Code Cleanup: Remove false positives, redundant validations, and reduce verbose logging

Following the C7 stride upgrade fix (commit 23c0d9541), this commit performs
comprehensive cleanup to improve code quality and reduce debug noise.

## Changes

### 1. Disable False Positive Checks (tiny_nextptr.h)
- **Disabled**: NXT_MISALIGN validation block with `#if 0`
- **Reason**: Produces false positives due to slab base offsets (2048, 65536)
  not being stride-aligned, causing all blocks to appear "misaligned"
- **TODO**: Reimplement to check stride DISTANCE between consecutive blocks
  instead of absolute alignment to stride boundaries

### 2. Remove Redundant Geometry Validations

**hakmem_tiny_refill_p0.inc.h (P0 batch refill)**
- Removed 25-line CARVE_GEOMETRY_FIX validation block
- Replaced with NOTE explaining redundancy
- **Reason**: Stride table is now correct in tiny_block_stride_for_class(),
  defense-in-depth validation adds overhead without benefit

**ss_legacy_backend_box.c (legacy backend)**
- Removed 18-line LEGACY_FIX_GEOMETRY validation block
- Replaced with NOTE explaining redundancy
- **Reason**: Shared_pool validates geometry at acquisition time

### 3. Reduce Verbose Logging

**hakmem_shared_pool.c (sp_fix_geometry_if_needed)**
- Made SP_FIX_GEOMETRY logging conditional on `!HAKMEM_BUILD_RELEASE`
- **Reason**: Geometry fixes are expected during stride upgrades,
  no need to log in release builds

### 4. Verification
- Build: ✅ Successful (LTO warnings expected)
- Test: ✅ 10K iterations (1.87M ops/s, no crashes)
- NXT_MISALIGN false positives: ✅ Eliminated

## Files Modified
- core/tiny_nextptr.h - Disabled false positive NXT_MISALIGN check
- core/hakmem_tiny_refill_p0.inc.h - Removed redundant CARVE validation
- core/box/ss_legacy_backend_box.c - Removed redundant LEGACY validation
- core/hakmem_shared_pool.c - Made SP_FIX_GEOMETRY logging debug-only

## Impact
- **Code clarity**: Removed 43 lines of redundant validation code
- **Debug noise**: Reduced false positive diagnostics
- **Performance**: Eliminated overhead from redundant geometry checks
- **Maintainability**: Single source of truth for geometry validation

🧹 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 23:00:24 +09:00
+								}
-												Refactor: Extract 5 Box modules from hakmem_tiny.c (-52% size reduction)

Split hakmem_tiny.c (2081 lines) into focused modules for better maintainability.

## Changes

**hakmem_tiny.c**: 2081 → 995 lines (-1086 lines, -52% reduction)

## Extracted Modules (5 boxes)

1. **config_box** (211 lines)
   - Size class tables, integrity counters
   - Debug flags, benchmark macros
   - HAK_RET_ALLOC/HAK_STAT_FREE instrumentation

2. **publish_box** (419 lines)
   - Publish/Adopt counters and statistics
   - Bench mailbox, partial ring
   - Live cap/Hot slot management
   - TLS helper functions (tiny_tls_default_*)

3. **globals_box** (256 lines)
   - Global variable declarations (~70 variables)
   - TinyPool instance and initialization flag
   - TLS variables (g_tls_lists, g_fast_head, g_fast_count)
   - SuperSlab configuration (partial ring, empty reserves)
   - Adopt gate functions

4. **phase6_wrappers_box** (122 lines)
   - Phase 6 Box Theory wrapper layer
   - hak_tiny_alloc_fast_wrapper()
   - hak_tiny_free_fast_wrapper()
   - Diagnostic instrumentation

5. **ace_guard_box** (100 lines)
   - ACE Learning Layer (hkm_ace_set_drain_threshold)
   - FastCache API (tiny_fc_room, tiny_fc_push_bulk)
   - Tiny Guard debugging system (5 functions)

## Benefits

- **Readability**: Giant 2k file → focused 1k core + 5 coherent modules
- **Maintainability**: Each box has clear responsibility and boundaries
- **Build**: All modules compile successfully ✅

## Technical Details

- Phase 1: ChatGPT extracted config_box + publish_box (-625 lines)
- Phase 2-4: Claude extracted globals_box + phase6_wrappers_box + ace_guard_box (-461 lines)
- All extractions use .inc files (same translation unit, preserves static/TLS linkage)
- Fixed Makefile: Added tiny_sizeclass_hist_box.o to OBJS_BASE and BENCH_HAKMEM_OBJS_BASE

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 01:16:45 +09:00
-												Performance Measurement Framework: Unified Cache, TLS SLL, Shared Pool Analysis

## Summary

Implemented production-grade measurement infrastructure to quantify top 3 bottlenecks:
- Unified cache hit/miss rates + refill cost
- TLS SLL usage patterns
- Shared pool lock contention distribution

## Changes

### 1. Unified Cache Metrics (tiny_unified_cache.h/c)
- Added atomic counters:
  - g_unified_cache_hits_global: successful cache pops
  - g_unified_cache_misses_global: refill triggers
  - g_unified_cache_refill_cycles_global: refill cost in CPU cycles (rdtsc)
- Instrumented `unified_cache_pop_or_refill()` to count hits
- Instrumented `unified_cache_refill()` with cycle measurement
- ENV-gated: HAKMEM_MEASURE_UNIFIED_CACHE=1 (default: off)
- Added unified_cache_print_measurements() output function

### 2. TLS SLL Metrics (tls_sll_box.h)
- Added atomic counters:
  - g_tls_sll_push_count_global: total pushes
  - g_tls_sll_pop_count_global: successful pops
  - g_tls_sll_pop_empty_count_global: empty list conditions
- Instrumented push/pop paths
- Added tls_sll_print_measurements() output function

### 3. Shared Pool Contention (hakmem_shared_pool_acquire.c)
- Added atomic counters:
  - g_sp_stage2_lock_acquired_global: Stage 2 locks
  - g_sp_stage3_lock_acquired_global: Stage 3 allocations
  - g_sp_alloc_lock_contention_global: total lock acquisitions
- Instrumented all pthread_mutex_lock calls in hot paths
- Added shared_pool_print_measurements() output function

### 4. Benchmark Integration (bench_random_mixed.c)
- Called all 3 print functions after benchmark loop
- Functions active only when HAKMEM_MEASURE_UNIFIED_CACHE=1 set

## Design Principles

- **Zero overhead when disabled**: Inline checks with __builtin_expect hints
- **Atomic relaxed memory order**: Minimal synchronization overhead
- **ENV-gated**: Single flag controls all measurements
- **Production-safe**: Compiles in release builds, no functional changes

## Usage

```bash
HAKMEM_MEASURE_UNIFIED_CACHE=1 ./bench_allocators_hakmem bench_random_mixed_hakmem 1000000 256 42
```

Output (when enabled):
```
========================================
Unified Cache Statistics
========================================
Hits:        1234567
Misses:      56789
Hit Rate:    95.6%
Avg Refill Cycles: 1234

========================================
TLS SLL Statistics
========================================
Total Pushes:     1234567
Total Pops:       345678
Pop Empty Count:  12345
Hit Rate:         98.8%

========================================
Shared Pool Contention Statistics
========================================
Stage 2 Locks:    123456 (33%)
Stage 3 Locks:    234567 (67%)
Total Contention: 357 locks per 1M ops
```

## Next Steps

1. **Enable measurements** and run benchmarks to gather data
2. **Analyze miss rates**: Which bottleneck dominates?
3. **Profile hottest stage**: Focus optimization on top contributor
4. Possible targets:
   - Increase unified cache capacity if miss rate >5%
   - Profile if TLS SLL is unused (potential legacy code removal)
   - Analyze if Stage 2 lock can be replaced with CAS

## Makefile Updates

Added core/box/tiny_route_box.o to:
- OBJS_BASE (test build)
- SHARED_OBJS (shared library)
- BENCH_HAKMEM_OBJS_BASE (benchmark)
- TINY_BENCH_OBJS_BASE (tiny benchmark)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 18:26:39 +09:00
+								// ============================================================================
 								// Performance Measurement: TLS SLL Statistics Print Function
 								// ============================================================================
 								void tls_sll_print_measurements(void) {
 								    // Check if measurement is enabled
 								    static int g_measure = -1;
 								    if (g_measure == -1) {
 								        const char* e = getenv("HAKMEM_MEASURE_UNIFIED_CACHE");
 								        g_measure = (e && *e && *e != '0') ? 1 : 0;
 								    }
 								    if (!g_measure) {
 								        return;  // Measurement disabled
 								    }
 								    uint64_t pushes = atomic_load_explicit(&g_tls_sll_push_count_global, memory_order_relaxed);
 								    uint64_t pops = atomic_load_explicit(&g_tls_sll_pop_count_global, memory_order_relaxed);
 								    uint64_t pop_empty = atomic_load_explicit(&g_tls_sll_pop_empty_count_global, memory_order_relaxed);
 								    uint64_t total_pop_attempts = pops + pop_empty;
 								    if (total_pop_attempts == 0 && pushes == 0) {
 								        fprintf(stderr, "\n========================================\n");
 								        fprintf(stderr, "TLS SLL Statistics\n");
 								        fprintf(stderr, "========================================\n");
 								        fprintf(stderr, "No operations recorded\n");
 								        fprintf(stderr, "========================================\n\n");
 								        return;
 								    }
 								    double hit_rate = total_pop_attempts > 0 ? (100.0 * pops) / total_pop_attempts : 0.0;
 								    double empty_rate = total_pop_attempts > 0 ? (100.0 * pop_empty) / total_pop_attempts : 0.0;
 								    fprintf(stderr, "\n========================================\n");
 								    fprintf(stderr, "TLS SLL Statistics\n");
 								    fprintf(stderr, "========================================\n");
 								    fprintf(stderr, "Total Pushes:     %llu\n", (unsigned long long)pushes);
 								    fprintf(stderr, "Total Pops:       %llu\n", (unsigned long long)pops);
 								    fprintf(stderr, "Pop Empty Count:  %llu (%.1f%% of pops)\n",
 								            (unsigned long long)pop_empty, empty_rate);
 								    fprintf(stderr, "Hit Rate:         %.1f%%\n", hit_rate);
 								    fprintf(stderr, "========================================\n\n");
 								}
 								// ============================================================================
-												Refactor: Extract 5 Box modules from hakmem_tiny.c (-52% size reduction)

Split hakmem_tiny.c (2081 lines) into focused modules for better maintainability.

## Changes

**hakmem_tiny.c**: 2081 → 995 lines (-1086 lines, -52% reduction)

## Extracted Modules (5 boxes)

1. **config_box** (211 lines)
   - Size class tables, integrity counters
   - Debug flags, benchmark macros
   - HAK_RET_ALLOC/HAK_STAT_FREE instrumentation

2. **publish_box** (419 lines)
   - Publish/Adopt counters and statistics
   - Bench mailbox, partial ring
   - Live cap/Hot slot management
   - TLS helper functions (tiny_tls_default_*)

3. **globals_box** (256 lines)
   - Global variable declarations (~70 variables)
   - TinyPool instance and initialization flag
   - TLS variables (g_tls_lists, g_fast_head, g_fast_count)
   - SuperSlab configuration (partial ring, empty reserves)
   - Adopt gate functions

4. **phase6_wrappers_box** (122 lines)
   - Phase 6 Box Theory wrapper layer
   - hak_tiny_alloc_fast_wrapper()
   - hak_tiny_free_fast_wrapper()
   - Diagnostic instrumentation

5. **ace_guard_box** (100 lines)
   - ACE Learning Layer (hkm_ace_set_drain_threshold)
   - FastCache API (tiny_fc_room, tiny_fc_push_bulk)
   - Tiny Guard debugging system (5 functions)

## Benefits

- **Readability**: Giant 2k file → focused 1k core + 5 coherent modules
- **Maintainability**: Each box has clear responsibility and boundaries
- **Build**: All modules compile successfully ✅

## Technical Details

- Phase 1: ChatGPT extracted config_box + publish_box (-625 lines)
- Phase 2-4: Claude extracted globals_box + phase6_wrappers_box + ace_guard_box (-461 lines)
- All extractions use .inc files (same translation unit, preserves static/TLS linkage)
- Fixed Makefile: Added tiny_sizeclass_hist_box.o to OBJS_BASE and BENCH_HAKMEM_OBJS_BASE

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-21 01:16:45 +09:00
+								// ACE Learning Layer & Tiny Guard - EXTRACTED to hakmem_tiny_ace_guard_box.inc
-												Performance Measurement Framework: Unified Cache, TLS SLL, Shared Pool Analysis

## Summary

Implemented production-grade measurement infrastructure to quantify top 3 bottlenecks:
- Unified cache hit/miss rates + refill cost
- TLS SLL usage patterns
- Shared pool lock contention distribution

## Changes

### 1. Unified Cache Metrics (tiny_unified_cache.h/c)
- Added atomic counters:
  - g_unified_cache_hits_global: successful cache pops
  - g_unified_cache_misses_global: refill triggers
  - g_unified_cache_refill_cycles_global: refill cost in CPU cycles (rdtsc)
- Instrumented `unified_cache_pop_or_refill()` to count hits
- Instrumented `unified_cache_refill()` with cycle measurement
- ENV-gated: HAKMEM_MEASURE_UNIFIED_CACHE=1 (default: off)
- Added unified_cache_print_measurements() output function

### 2. TLS SLL Metrics (tls_sll_box.h)
- Added atomic counters:
  - g_tls_sll_push_count_global: total pushes
  - g_tls_sll_pop_count_global: successful pops
  - g_tls_sll_pop_empty_count_global: empty list conditions
- Instrumented push/pop paths
- Added tls_sll_print_measurements() output function

### 3. Shared Pool Contention (hakmem_shared_pool_acquire.c)
- Added atomic counters:
  - g_sp_stage2_lock_acquired_global: Stage 2 locks
  - g_sp_stage3_lock_acquired_global: Stage 3 allocations
  - g_sp_alloc_lock_contention_global: total lock acquisitions
- Instrumented all pthread_mutex_lock calls in hot paths
- Added shared_pool_print_measurements() output function

### 4. Benchmark Integration (bench_random_mixed.c)
- Called all 3 print functions after benchmark loop
- Functions active only when HAKMEM_MEASURE_UNIFIED_CACHE=1 set

## Design Principles

- **Zero overhead when disabled**: Inline checks with __builtin_expect hints
- **Atomic relaxed memory order**: Minimal synchronization overhead
- **ENV-gated**: Single flag controls all measurements
- **Production-safe**: Compiles in release builds, no functional changes

## Usage

```bash
HAKMEM_MEASURE_UNIFIED_CACHE=1 ./bench_allocators_hakmem bench_random_mixed_hakmem 1000000 256 42
```

Output (when enabled):
```
========================================
Unified Cache Statistics
========================================
Hits:        1234567
Misses:      56789
Hit Rate:    95.6%
Avg Refill Cycles: 1234

========================================
TLS SLL Statistics
========================================
Total Pushes:     1234567
Total Pops:       345678
Pop Empty Count:  12345
Hit Rate:         98.8%

========================================
Shared Pool Contention Statistics
========================================
Stage 2 Locks:    123456 (33%)
Stage 3 Locks:    234567 (67%)
Total Contention: 357 locks per 1M ops
```

## Next Steps

1. **Enable measurements** and run benchmarks to gather data
2. **Analyze miss rates**: Which bottleneck dominates?
3. **Profile hottest stage**: Focus optimization on top contributor
4. Possible targets:
   - Increase unified cache capacity if miss rate >5%
   - Profile if TLS SLL is unused (potential legacy code removal)
   - Analyze if Stage 2 lock can be replaced with CAS

## Makefile Updates

Added core/box/tiny_route_box.o to:
- OBJS_BASE (test build)
- SHARED_OBJS (shared library)
- BENCH_HAKMEM_OBJS_BASE (benchmark)
- TINY_BENCH_OBJS_BASE (tiny benchmark)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 18:26:39 +09:00
+								// ============================================================================
-												Implement Phantom typing for Tiny FastCache layer

Refactor FastCache and TLS cache APIs to use Phantom types (hak_base_ptr_t)
for compile-time type safety, preventing BASE/USER pointer confusion.

Changes:
1. core/hakmem_tiny_fastcache.inc.h:
   - fastcache_pop() returns hak_base_ptr_t instead of void*
   - fastcache_push() accepts hak_base_ptr_t instead of void*

2. core/hakmem_tiny.c:
   - Updated forward declarations to match new signatures

3. core/tiny_alloc_fast.inc.h, core/hakmem_tiny_alloc.inc:
   - Alloc paths now use hak_base_ptr_t for cache operations
   - BASE->USER conversion via HAK_RET_ALLOC macro

4. core/hakmem_tiny_refill.inc.h, core/refill/ss_refill_fc.h:
   - Refill paths properly handle BASE pointer types
   - Fixed: Removed unnecessary HAK_BASE_FROM_RAW() in ss_refill_fc.h line 176

5. core/hakmem_tiny_free.inc, core/tiny_free_magazine.inc.h:
   - Free paths convert USER->BASE before cache push
   - USER->BASE conversion via HAK_USER_TO_BASE or ptr_user_to_base()

6. core/hakmem_tiny_legacy_slow_box.inc:
   - Legacy path properly wraps pointers for cache API

Benefits:
- Type safety at compile time (in debug builds)
- Zero runtime overhead (debug builds only, release builds use typedef=void*)
- All BASE->USER conversions verified via Task analysis
- Prevents pointer type confusion bugs

Testing:
- Build: SUCCESS (all 9 files)
- Smoke test: PASS (sh8bench runs to completion)
- Conversion path verification: 3/3 paths correct

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 11:05:06 +09:00
+								#include "hakmem_tiny_ace_guard_box.inc"