hakmem/core/box/ss_allocation_box.c

// Box: Core Allocation
// Purpose: SuperSlab allocation/deallocation (Box化フロント)

#include "ss_allocation_box.h"
#include "ss_os_acquire_box.h"
#include "ss_cache_box.h"
#include "ss_stats_box.h"
#include "ss_ace_box.h"
#include "hakmem_super_registry.h"
#include "ss_addr_map_box.h"
#include "hakmem_tiny_config.h"
#include "hakmem_policy.h"  // Phase E3-1: Access FrozenPolicy for never-free policy
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <pthread.h>

// Global statistics (defined in ss_stats_box.c, declared here for access)
extern pthread_mutex_t g_superslab_lock;
extern uint64_t g_superslabs_freed;
extern uint64_t g_bytes_allocated;

// g_ss_force_lg is defined in ss_ace_box.c but needs external linkage
extern int g_ss_force_lg;

// g_ss_populate_once controls MAP_POPULATE flag (defined in superslab_ace.c)
extern _Atomic int g_ss_populate_once;

// ============================================================================
// SuperSlab Allocation (ACE-Aware)
// ============================================================================

SuperSlab* superslab_allocate(uint8_t size_class) {
    // Optional fault injection for testing: HAKMEM_TINY_SS_FAULT_RATE=N → 1/N で失敗
    static int fault_rate = -1;  // -1=unparsed, 0=disabled, >0=rate
    static __thread unsigned long fault_tick = 0;
    if (__builtin_expect(fault_rate == -1, 0)) {
        const char* e = getenv("HAKMEM_TINY_SS_FAULT_RATE");
        if (e && *e) {
            int v = atoi(e); if (v < 0) v = 0; fault_rate = v;
        } else {
            fault_rate = 0;
        }
    }
    if (fault_rate > 0) {
        unsigned long t = ++fault_tick;
        if ((t % (unsigned long)fault_rate) == 0ul) {
            return NULL;  // simulate OOM
        }
    }
    // Optional env clamp for SuperSlab size
    static int env_parsed = 0;
    // Allow full ACE range [MIN..MAX] by default so 1MB/2MB の二択学習が有効になる。
    static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_MIN;
    static uint8_t g_ss_max_lg_env = SUPERSLAB_LG_MAX;
    if (!env_parsed) {
        char* maxmb = getenv("HAKMEM_TINY_SS_MAX_MB");
        if (maxmb) {
            int m = atoi(maxmb); if (m == 1) g_ss_max_lg_env = 20; else if (m == 2) g_ss_max_lg_env = 21;
        }
        char* minmb = getenv("HAKMEM_TINY_SS_MIN_MB");
        if (minmb) {
            int m = atoi(minmb); if (m == 1) g_ss_min_lg_env = 20; else if (m == 2) g_ss_min_lg_env = 21;
        }
        if (g_ss_min_lg_env > g_ss_max_lg_env) g_ss_min_lg_env = g_ss_max_lg_env;
        const char* force_lg_env = getenv("HAKMEM_TINY_SS_FORCE_LG");
        if (force_lg_env && *force_lg_env) {
            int v = atoi(force_lg_env);
            if (v >= SUPERSLAB_LG_MIN && v <= SUPERSLAB_LG_MAX) {
                g_ss_force_lg = v;
                g_ss_min_lg_env = g_ss_max_lg_env = v;
            }
        }
        size_t precharge_default = 0;
        const char* precharge_env = getenv("HAKMEM_TINY_SS_PRECHARGE");
        if (precharge_env && *precharge_env) {
            long v = atol(precharge_env);
            if (v < 0) v = 0;
            precharge_default = (size_t)v;
            if (v > 0) {
                atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
            }
        }
        size_t cache_default = 0;
        const char* cache_env = getenv("HAKMEM_TINY_SS_CACHE");
        if (cache_env && *cache_env) {
            long v = atol(cache_env);
            if (v < 0) v = 0;
            cache_default = (size_t)v;
        }
        // Initialize cache/precharge via direct manipulation (box API doesn't need init function)
        for (int i = 0; i < 8; i++) {
            extern size_t g_ss_cache_cap[8];
            extern size_t g_ss_precharge_target[8];
            g_ss_cache_cap[i] = cache_default;
            g_ss_precharge_target[i] = precharge_default;
        }

        for (int i = 0; i < 8; i++) {
            char name[64];
            snprintf(name, sizeof(name), "HAKMEM_TINY_SS_CACHE_C%d", i);
            char* cap_env = getenv(name);
            if (cap_env && *cap_env) {
                long v = atol(cap_env);
                if (v < 0) v = 0;
                tiny_ss_cache_set_class_cap(i, (size_t)v);
            }
            snprintf(name, sizeof(name), "HAKMEM_TINY_SS_PRECHARGE_C%d", i);
            char* pre_env = getenv(name);
            if (pre_env && *pre_env) {
                long v = atol(pre_env);
                if (v < 0) v = 0;
                tiny_ss_precharge_set_class_target(i, (size_t)v);
            }
        }
        const char* populate_env = getenv("HAKMEM_TINY_SS_POPULATE_ONCE");
        if (populate_env && atoi(populate_env) != 0) {
            atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
        }
        env_parsed = 1;
    }

    uint8_t lg = (g_ss_force_lg >= 0) ? (uint8_t)g_ss_force_lg : hak_tiny_superslab_next_lg(size_class);
    if (lg < g_ss_min_lg_env) lg = g_ss_min_lg_env;
    if (lg > g_ss_max_lg_env) lg = g_ss_max_lg_env;
    size_t ss_size = (size_t)1 << lg;  // 2^20 = 1MB, 2^21 = 2MB
    uintptr_t ss_mask = ss_size - 1;
    int from_cache = 0;
    void* ptr = NULL;

    // Debug logging flag (lazy init)
    static __thread int dbg = -1;
#if HAKMEM_BUILD_RELEASE
    dbg = 0;
#else
    if (__builtin_expect(dbg == -1, 0)) {
        const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG");
        dbg = (e && *e && *e != '0') ? 1 : 0;
    }
#endif

    // Phase 9: Try LRU cache first (lazy deallocation)
    SuperSlab* cached_ss = hak_ss_lru_pop(size_class);
    if (cached_ss) {
        ptr = (void*)cached_ss;
        from_cache = 1;
        // Debug logging for REFILL from LRU
        if (dbg == 1) {
            fprintf(stderr, "[REFILL] class=%d from_lru=1 ss=%p\n",
                    size_class, (void*)cached_ss);
        }
        // Skip old cache path - LRU cache takes priority
    } else {
        // Fallback to old cache (will be deprecated)
        ss_cache_precharge(size_class, ss_size, ss_mask);
        void* old_cached = ss_cache_pop(size_class);
        if (old_cached) {
            ptr = old_cached;
            from_cache = 1;
            // Debug logging for REFILL from prewarm (old cache is essentially prewarm)
            if (dbg == 1) {
                fprintf(stderr, "[REFILL] class=%d from_prewarm=1 ss=%p\n",
                        size_class, ptr);
            }
        }
    }

    if (!ptr) {
        int populate = atomic_exchange_explicit(&g_ss_populate_once, 0, memory_order_acq_rel);
        ptr = ss_os_acquire(size_class, ss_size, ss_mask, populate);
        if (!ptr) {
            return NULL;
        }
        // Debug logging for REFILL with new allocation
        if (dbg == 1) {
            fprintf(stderr, "[REFILL] class=%d new_alloc=1 ss=%p\n",
                    size_class, (void*)ptr);
        }
    }

    // Initialize SuperSlab header (Phase 12: no global size_class field)
    SuperSlab* ss = (SuperSlab*)ptr;
    ss->magic = SUPERSLAB_MAGIC;
    ss->active_slabs = 0;
    ss->lg_size = lg;  // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB)
    ss->slab_bitmap = 0;
    ss->nonempty_mask = 0;  // Phase 6-2.1: ChatGPT Pro P0 - init nonempty mask
    ss->freelist_mask = 0;  // P1.1 FIX: Initialize freelist_mask
    ss->empty_mask = 0;     // P1.1 FIX: Initialize empty_mask
    ss->empty_count = 0;    // P1.1 FIX: Initialize empty_count
    ss->partial_epoch = 0;
    ss->publish_hint = 0xFF;

    // Initialize atomics explicitly
    atomic_store_explicit(&ss->total_active_blocks, 0, memory_order_relaxed);
    atomic_store_explicit(&ss->refcount, 0, memory_order_relaxed);
    atomic_store_explicit(&ss->listed, 0, memory_order_relaxed);
    ss->partial_next = NULL;

    // Phase 9: Initialize LRU fields
    ss->last_used_ns = 0;
    ss->generation = 0;
    ss->lru_prev = NULL;
    ss->lru_next = NULL;

    // Phase 3d-C: Initialize hot/cold fields
    ss->hot_count = 0;
    ss->cold_count = 0;
    memset(ss->hot_indices, 0, sizeof(ss->hot_indices));
    memset(ss->cold_indices, 0, sizeof(ss->cold_indices));

    // Phase 12: Initialize next_chunk (legacy per-class chain)
    ss->next_chunk = NULL;

    // Initialize all slab metadata (only up to max slabs for this size).
    // NOTE: 詳細な Slab 初期化と Remote Queue Drain は superslab_slab.c
    //（Slab Management Box）側に集約している。
    int max_slabs = (int)(ss_size / SLAB_SIZE);

    // DEFENSIVE FIX: Zero all slab metadata arrays to prevent ANY uninitialized pointers
    // This catches the 0xa2a2a2a2a2a2a2a2 pattern bug (ASan/debug fill pattern)
    // Even though mmap should return zeroed pages, sanitizers may fill with debug patterns
    memset(ss->slabs, 0, max_slabs * sizeof(TinySlabMeta));
    memset(ss->remote_heads, 0, max_slabs * sizeof(uintptr_t));
    memset(ss->remote_counts, 0, max_slabs * sizeof(uint32_t));
    memset(ss->slab_listed, 0, max_slabs * sizeof(uint32_t));

    // P1.1: Initialize class_map to UNASSIGNED (255) for all slabs
    // This ensures class_map is in a known state even before slabs are assigned
    memset(ss->class_map, 255, max_slabs * sizeof(uint8_t));

    // P0 Optimization: Initialize shared_meta pointer (used for O(1) metadata lookup)
    ss->shared_meta = NULL;

    if (from_cache) {
        ss_stats_cache_reuse();
    }

    // Phase 8.3: Update ACE current_lg to match allocated size
    g_ss_ace[size_class].current_lg = lg;

    // Phase 1: Register SuperSlab in global registry for fast lookup
    // CRITICAL: Register AFTER full initialization (ss structure is ready)
    uintptr_t base = (uintptr_t)ss;
    int reg_ok = hak_super_register(base, ss);
    if (!reg_ok) {
        // Registry full - this is a fatal error
        fprintf(stderr, "HAKMEM FATAL: SuperSlab registry full, cannot register %p\n", ss);
        // Still return ss to avoid memory leak, but lookups may fail
    }
    do {
        static _Atomic uint32_t g_ss_reg_log_shot = 0;
        uint32_t shot = atomic_fetch_add_explicit(&g_ss_reg_log_shot, 1, memory_order_relaxed);
        if (shot < 4) {
            fprintf(stderr,
                    "[SS_REG_DEBUG] class=%u ss=%p reg_ok=%d map_count=%zu\n",
                    (unsigned)size_class,
                    (void*)ss,
                    reg_ok,
                    g_ss_addr_map.count);
            fflush(stderr);
        }
    } while (0);

    return ss;
}

// ============================================================================
// SuperSlab Deallocation
// ============================================================================

void superslab_free(SuperSlab* ss) {
    if (!ss || ss->magic != SUPERSLAB_MAGIC) {
        return;  // Invalid SuperSlab
    }

    // Guard: do not free while pinned by TLS/remote holders
    uint32_t ss_refs = atomic_load_explicit(&ss->refcount, memory_order_acquire);
    if (__builtin_expect(ss_refs != 0, 0)) {
#if !HAKMEM_BUILD_RELEASE
        static _Atomic uint32_t g_ss_free_pinned = 0;
        uint32_t shot = atomic_fetch_add_explicit(&g_ss_free_pinned, 1, memory_order_relaxed);
        if (shot < 8) {
            fprintf(stderr, "[SS_FREE_SKIP_PINNED] ss=%p refcount=%u\n", (void*)ss, (unsigned)ss_refs);
        }
#endif
        return;
    }

    // ADD DEBUG LOGGING
    static __thread int dbg = -1;
#if HAKMEM_BUILD_RELEASE
    dbg = 0;
#else
    if (__builtin_expect(dbg == -1, 0)) {
        const char* e = getenv("HAKMEM_SS_FREE_DEBUG");
        dbg = (e && *e && *e != '0') ? 1 : 0;
    }
#endif
    if (dbg == 1) {
        fprintf(stderr, "[SS_FREE] CALLED: ss=%p lg_size=%d active_slabs=%u\n",
                (void*)ss, ss->lg_size, ss->active_slabs);
    }

    // Phase 9: Lazy Deallocation - try to cache in LRU instead of munmap
    size_t ss_size = (size_t)1 << ss->lg_size;

    // Phase 1: Unregister SuperSlab from registry FIRST
    // CRITICAL: Must unregister BEFORE adding to LRU cache
    // Reason: Cached SuperSlabs should NOT be found by lookups
    uintptr_t base = (uintptr_t)ss;
    hak_super_unregister(base);

    // Memory fence to ensure unregister is visible
    atomic_thread_fence(memory_order_release);

    // Phase 9: Try LRU cache first (lazy deallocation)
    // NOTE: LRU cache keeps magic=SUPERSLAB_MAGIC for validation
    // Magic will be cleared on eviction or reuse
    int lru_cached = hak_ss_lru_push(ss);
    if (dbg == 1) {
        fprintf(stderr, "[SS_FREE] hak_ss_lru_push() returned %d\n", lru_cached);
    }
    if (lru_cached) {
        // Successfully cached in LRU - defer munmap
        return;
    }

    // LRU cache full or disabled - try old cache using head class_idx (if known)
    int old_cached = ss_cache_push(0, ss);
    if (old_cached) {
        ss_stats_cache_store();
        return;
    }

    // Phase E3-1: Check never-free policy before munmap (DISABLED - policy field not yet implemented)
    // If policy forbids Tiny SuperSlab munmap, skip deallocation (leak is intentional)
    // TODO: Add tiny_ss_never_free_global field to FrozenPolicy when implementing Phase E3-1
#if 0
    const FrozenPolicy* pol = hkm_policy_get();
    if (pol && pol->tiny_ss_never_free_global) {
        // Policy forbids munmap - keep SuperSlab allocated (intentional "leak")
        // Watermark enforcement will be added in Phase E3-2
#if !HAKMEM_BUILD_RELEASE
        fprintf(stderr, "[SS_POLICY_SKIP] Skipping munmap (never_free policy) ss=%p size=%zu\n",
                (void*)ss, ss_size);
#endif
        return;
    }
#endif

    // Both caches full - immediately free to OS (eager deallocation)
    // Clear magic to prevent use-after-free
    ss->magic = 0;

#if !HAKMEM_BUILD_RELEASE
    fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n",
            (void*)ss, ss_size,
            atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed));
#endif

    munmap(ss, ss_size);

    // Update statistics for actual release to OS
    pthread_mutex_lock(&g_superslab_lock);
    g_superslabs_freed++;
    // Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here
    g_bytes_allocated -= ss_size;
    pthread_mutex_unlock(&g_superslab_lock);

#if !HAKMEM_BUILD_RELEASE
    fprintf(stderr, "[DEBUG ss_os_release] g_superslabs_freed now = %llu\n",
            (unsigned long long)g_superslabs_freed);
#endif
}

// ============================================================================ 
// Slab Initialization within SuperSlab
// ============================================================================ 
// Note: superslab_init_slab() は superslab_slab.c（Slab Management Box）
// に実装されており、この Box では export しない。
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								// Box: Core Allocation
-												Comprehensive legacy cleanup and architecture consolidation

Summary of Changes:

MOVED TO ARCHIVE:
- core/hakmem_tiny_legacy_slow_box.inc → archive/
  * Slow path legacy code preserved for reference
  * Superseded by Gatekeeper Box architecture

- core/superslab_allocate.c → archive/superslab_allocate_legacy.c
  * Legacy SuperSlab allocation implementation
  * Functionality integrated into new Box system

- core/superslab_head.c → archive/superslab_head_legacy.c
  * Legacy slab head management
  * Refactored through Box architecture

REMOVED DEAD CODE:
- Eliminated unused allocation policy variants from ss_allocation_box.c
  * Reduced from 127+ lines of conditional logic to focused implementation
  * Removed: old policy branches, unused allocation strategies
  * Kept: current Box-based allocation path

ADDED NEW INFRASTRUCTURE:
- core/superslab_head_stub.c (41 lines)
  * Minimal stub for backward compatibility
  * Delegates to new architecture

- Enhanced core/superslab_cache.c (75 lines added)
  * Added missing API functions for cache management
  * Proper interface for SuperSlab cache integration

REFACTORED CORE SYSTEMS:
- core/hakmem_super_registry.c
  * Moved registration logic from scattered locations
  * Centralized SuperSlab registry management

- core/hakmem_tiny.c
  * Removed 27 lines of redundant initialization
  * Simplified through Box architecture

- core/hakmem_tiny_alloc.inc
  * Streamlined allocation path to use Gatekeeper
  * Removed legacy decision logic

- core/box/ss_allocation_box.c/h
  * Dramatically simplified allocation policy
  * Removed conditional branches for unused strategies
  * Focused on current Box-based approach

BUILD SYSTEM:
- Updated Makefile for archive structure
- Removed obsolete object file references
- Maintained build compatibility

SAFETY & TESTING:
- All deletions verified: no broken references
- Build verification: RELEASE=0 and RELEASE=1 pass
- Smoke tests: 100% pass rate
- Functional verification: allocation/free intact

Architecture Consolidation:
Before: Multiple overlapping allocation paths with legacy code branches
After:  Single unified path through Gatekeeper Boxes with clear architecture

Benefits:
- Reduced code size and complexity
- Improved maintainability
- Single source of truth for allocation logic
- Better diagnostic/observability hooks
- Foundation for future optimizations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 14:22:48 +09:00
+								// Purpose: SuperSlab allocation/deallocation (Box化フロント)
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
 								#include "ss_allocation_box.h"
 								#include "ss_os_acquire_box.h"
 								#include "ss_cache_box.h"
 								#include "ss_stats_box.h"
 								#include "ss_ace_box.h"
 								#include "hakmem_super_registry.h"
-												WIP: Add TLS SLL validation and SuperSlab registry fallback

ChatGPT's diagnostic changes to address TLS_SLL_HDR_RESET issue.
Current status: Partial mitigation, but root cause remains.

Changes Applied:
1. SuperSlab Registry Fallback (hakmem_super_registry.h)
   - Added legacy table probe when hash map lookup misses
   - Prevents NULL returns for valid SuperSlabs during initialization
   - Status: ✅ Works but may hide underlying registration issues

2. TLS SLL Push Validation (tls_sll_box.h)
   - Reject push if SuperSlab lookup returns NULL
   - Reject push if class_idx mismatch detected
   - Added [TLS_SLL_PUSH_NO_SS] diagnostic message
   - Status: ✅ Prevents list corruption (defensive)

3. SuperSlab Allocation Class Fix (superslab_allocate.c)
   - Pass actual class_idx to sp_internal_allocate_superslab
   - Prevents dummy class=8 causing OOB access
   - Status: ✅ Root cause fix for allocation path

4. Debug Output Additions
   - First 256 push/pop operations traced
   - First 4 mismatches logged with details
   - SuperSlab registration state logged
   - Status: ✅ Diagnostic tool (not a fix)

5. TLS Hint Box Removed
   - Deleted ss_tls_hint_box.{c,h} (Phase 1 optimization)
   - Simplified to focus on stability first
   - Status: ⏳ Can be re-added after root cause fixed

Current Problem (REMAINS UNSOLVED):
- [TLS_SLL_HDR_RESET] still occurs after ~60 seconds of sh8bench
- Pointer is 16 bytes offset from expected (class 1 → class 2 boundary)
- hak_super_lookup returns NULL for that pointer
- Suggests: Use-After-Free, Double-Free, or pointer arithmetic error

Root Cause Analysis:
- Pattern: Pointer offset by +16 (one class 1 stride)
- Timing: Cumulative problem (appears after 60s, not immediately)
- Location: Header corruption detected during TLS SLL pop

Remaining Issues:
⚠️ Registry fallback is defensive (may hide registration bugs)
⚠️ Push validation prevents symptoms but not root cause
⚠️ 16-byte pointer offset source unidentified

Next Steps for Investigation:
1. Full pointer arithmetic audit (Magazine ⇔ TLS SLL paths)
2. Enhanced logging at HDR_RESET point:
   - Expected vs actual pointer value
   - Pointer provenance (where it came from)
   - Allocation trace for that block
3. Verify Headerless flag is OFF throughout build
4. Check for double-offset application in conversions

Technical Assessment:
- 60% root cause fixes (allocation class, validation)
- 40% defensive mitigation (registry fallback, push rejection)

Performance Impact:
- Registry fallback: +10-30 cycles on cold path (negligible)
- Push validation: +5-10 cycles per push (acceptable)
- Overall: < 2% performance impact estimated

Related Issues:
- Phase 1 TLS Hint Box removed temporarily
- Phase 2 Headerless blocked until stability achieved

🤖 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 20:42:28 +09:00
+								#include "ss_addr_map_box.h"
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								#include "hakmem_tiny_config.h"
 								#include "hakmem_policy.h"  // Phase E3-1: Access FrozenPolicy for never-free policy
 								#include <stdio.h>
 								#include <stdlib.h>
 								#include <string.h>
 								#include <sys/mman.h>
 								#include <pthread.h>
 								// Global statistics (defined in ss_stats_box.c, declared here for access)
 								extern pthread_mutex_t g_superslab_lock;
 								extern uint64_t g_superslabs_freed;
 								extern uint64_t g_bytes_allocated;
 								// g_ss_force_lg is defined in ss_ace_box.c but needs external linkage
 								extern int g_ss_force_lg;
-												Comprehensive legacy cleanup and architecture consolidation

Summary of Changes:

MOVED TO ARCHIVE:
- core/hakmem_tiny_legacy_slow_box.inc → archive/
  * Slow path legacy code preserved for reference
  * Superseded by Gatekeeper Box architecture

- core/superslab_allocate.c → archive/superslab_allocate_legacy.c
  * Legacy SuperSlab allocation implementation
  * Functionality integrated into new Box system

- core/superslab_head.c → archive/superslab_head_legacy.c
  * Legacy slab head management
  * Refactored through Box architecture

REMOVED DEAD CODE:
- Eliminated unused allocation policy variants from ss_allocation_box.c
  * Reduced from 127+ lines of conditional logic to focused implementation
  * Removed: old policy branches, unused allocation strategies
  * Kept: current Box-based allocation path

ADDED NEW INFRASTRUCTURE:
- core/superslab_head_stub.c (41 lines)
  * Minimal stub for backward compatibility
  * Delegates to new architecture

- Enhanced core/superslab_cache.c (75 lines added)
  * Added missing API functions for cache management
  * Proper interface for SuperSlab cache integration

REFACTORED CORE SYSTEMS:
- core/hakmem_super_registry.c
  * Moved registration logic from scattered locations
  * Centralized SuperSlab registry management

- core/hakmem_tiny.c
  * Removed 27 lines of redundant initialization
  * Simplified through Box architecture

- core/hakmem_tiny_alloc.inc
  * Streamlined allocation path to use Gatekeeper
  * Removed legacy decision logic

- core/box/ss_allocation_box.c/h
  * Dramatically simplified allocation policy
  * Removed conditional branches for unused strategies
  * Focused on current Box-based approach

BUILD SYSTEM:
- Updated Makefile for archive structure
- Removed obsolete object file references
- Maintained build compatibility

SAFETY & TESTING:
- All deletions verified: no broken references
- Build verification: RELEASE=0 and RELEASE=1 pass
- Smoke tests: 100% pass rate
- Functional verification: allocation/free intact

Architecture Consolidation:
Before: Multiple overlapping allocation paths with legacy code branches
After:  Single unified path through Gatekeeper Boxes with clear architecture

Benefits:
- Reduced code size and complexity
- Improved maintainability
- Single source of truth for allocation logic
- Better diagnostic/observability hooks
- Foundation for future optimizations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 14:22:48 +09:00
+								// g_ss_populate_once controls MAP_POPULATE flag (defined in superslab_ace.c)
 								extern _Atomic int g_ss_populate_once;
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
 								// ============================================================================
 								// SuperSlab Allocation (ACE-Aware)
 								// ============================================================================
 								SuperSlab* superslab_allocate(uint8_t size_class) {
 								    // Optional fault injection for testing: HAKMEM_TINY_SS_FAULT_RATE=N → 1/N で失敗
 								    static int fault_rate = -1;  // -1=unparsed, 0=disabled, >0=rate
 								    static __thread unsigned long fault_tick = 0;
 								    if (__builtin_expect(fault_rate == -1, 0)) {
 								        const char* e = getenv("HAKMEM_TINY_SS_FAULT_RATE");
 								        if (e && *e) {
 								            int v = atoi(e); if (v < 0) v = 0; fault_rate = v;
 								        } else {
 								            fault_rate = 0;
 								        }
 								    }
 								    if (fault_rate > 0) {
 								        unsigned long t = ++fault_tick;
 								        if ((t % (unsigned long)fault_rate) == 0ul) {
 								            return NULL;  // simulate OOM
 								        }
 								    }
 								    // Optional env clamp for SuperSlab size
 								    static int env_parsed = 0;
 								    // Allow full ACE range [MIN..MAX] by default so 1MB/2MB の二択学習が有効になる。
 								    static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_MIN;
 								    static uint8_t g_ss_max_lg_env = SUPERSLAB_LG_MAX;
 								    if (!env_parsed) {
 								        char* maxmb = getenv("HAKMEM_TINY_SS_MAX_MB");
 								        if (maxmb) {
 								            int m = atoi(maxmb); if (m == 1) g_ss_max_lg_env = 20; else if (m == 2) g_ss_max_lg_env = 21;
 								        }
 								        char* minmb = getenv("HAKMEM_TINY_SS_MIN_MB");
 								        if (minmb) {
 								            int m = atoi(minmb); if (m == 1) g_ss_min_lg_env = 20; else if (m == 2) g_ss_min_lg_env = 21;
 								        }
 								        if (g_ss_min_lg_env > g_ss_max_lg_env) g_ss_min_lg_env = g_ss_max_lg_env;
 								        const char* force_lg_env = getenv("HAKMEM_TINY_SS_FORCE_LG");
 								        if (force_lg_env && *force_lg_env) {
 								            int v = atoi(force_lg_env);
 								            if (v >= SUPERSLAB_LG_MIN && v <= SUPERSLAB_LG_MAX) {
 								                g_ss_force_lg = v;
 								                g_ss_min_lg_env = g_ss_max_lg_env = v;
 								            }
 								        }
 								        size_t precharge_default = 0;
 								        const char* precharge_env = getenv("HAKMEM_TINY_SS_PRECHARGE");
 								        if (precharge_env && *precharge_env) {
 								            long v = atol(precharge_env);
 								            if (v < 0) v = 0;
 								            precharge_default = (size_t)v;
 								            if (v > 0) {
 								                atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
 								            }
 								        }
 								        size_t cache_default = 0;
 								        const char* cache_env = getenv("HAKMEM_TINY_SS_CACHE");
 								        if (cache_env && *cache_env) {
 								            long v = atol(cache_env);
 								            if (v < 0) v = 0;
 								            cache_default = (size_t)v;
 								        }
 								        // Initialize cache/precharge via direct manipulation (box API doesn't need init function)
 								        for (int i = 0; i < 8; i++) {
 								            extern size_t g_ss_cache_cap[8];
 								            extern size_t g_ss_precharge_target[8];
 								            g_ss_cache_cap[i] = cache_default;
 								            g_ss_precharge_target[i] = precharge_default;
 								        }
 								        for (int i = 0; i < 8; i++) {
 								            char name[64];
 								            snprintf(name, sizeof(name), "HAKMEM_TINY_SS_CACHE_C%d", i);
 								            char* cap_env = getenv(name);
 								            if (cap_env && *cap_env) {
 								                long v = atol(cap_env);
 								                if (v < 0) v = 0;
 								                tiny_ss_cache_set_class_cap(i, (size_t)v);
 								            }
 								            snprintf(name, sizeof(name), "HAKMEM_TINY_SS_PRECHARGE_C%d", i);
 								            char* pre_env = getenv(name);
 								            if (pre_env && *pre_env) {
 								                long v = atol(pre_env);
 								                if (v < 0) v = 0;
 								                tiny_ss_precharge_set_class_target(i, (size_t)v);
 								            }
 								        }
 								        const char* populate_env = getenv("HAKMEM_TINY_SS_POPULATE_ONCE");
 								        if (populate_env && atoi(populate_env) != 0) {
 								            atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
 								        }
 								        env_parsed = 1;
 								    }
 								    uint8_t lg = (g_ss_force_lg >= 0) ? (uint8_t)g_ss_force_lg : hak_tiny_superslab_next_lg(size_class);
 								    if (lg < g_ss_min_lg_env) lg = g_ss_min_lg_env;
 								    if (lg > g_ss_max_lg_env) lg = g_ss_max_lg_env;
 								    size_t ss_size = (size_t)1 << lg;  // 2^20 = 1MB, 2^21 = 2MB
 								    uintptr_t ss_mask = ss_size - 1;
 								    int from_cache = 0;
 								    void* ptr = NULL;
 								    // Debug logging flag (lazy init)
 								    static __thread int dbg = -1;
-												ENV cleanup: Add RELEASE guards to DEBUG ENV variables (14 vars)

Added compile-time guards (#if HAKMEM_BUILD_RELEASE) to eliminate
DEBUG ENV variable overhead in RELEASE builds.

Variables guarded (14 total):
- HAKMEM_TINY_TRACE_RING, HAKMEM_TINY_DUMP_RING_ATEXIT
- HAKMEM_TINY_RF_TRACE, HAKMEM_TINY_MAILBOX_TRACE
- HAKMEM_TINY_MAILBOX_TRACE_LIMIT, HAKMEM_TINY_MAILBOX_SLOWDISC
- HAKMEM_TINY_MAILBOX_SLOWDISC_PERIOD
- HAKMEM_SS_PREWARM_DEBUG, HAKMEM_SS_FREE_DEBUG
- HAKMEM_TINY_FRONT_METRICS, HAKMEM_TINY_FRONT_DUMP
- HAKMEM_TINY_COUNTERS_DUMP, HAKMEM_TINY_REFILL_DUMP
- HAKMEM_PTR_TRACE_DUMP, HAKMEM_PTR_TRACE_VERBOSE

Files modified (9 core files):
- core/tiny_debug_ring.c (ring trace/dump)
- core/box/mailbox_box.c (mailbox trace + slowdisc)
- core/tiny_refill.h (refill trace)
- core/hakmem_tiny_superslab.c (superslab debug)
- core/box/ss_allocation_box.c (allocation debug)
- core/tiny_superslab_free.inc.h (free debug)
- core/box/front_metrics_box.c (frontend metrics)
- core/hakmem_tiny_stats.c (stats dump)
- core/ptr_trace.h (pointer trace)

Bug fixes during implementation:
1. mailbox_box.c - Fixed variable scope (moved 'used' outside guard)
2. hakmem_tiny_stats.c - Fixed incomplete declarations (on1, on2)

Impact:
- Binary size: -85KB total
  - bench_random_mixed_hakmem: 319K → 305K (-14K, -4.4%)
  - larson_hakmem: 380K → 309K (-71K, -18.7%)
- Performance: No regression (16.9-17.9M ops/s maintained)
- Functional: All tests pass (Random Mixed + Larson)
- Behavior: DEBUG ENV vars correctly ignored in RELEASE builds

Testing:
- Build: Clean compilation (warnings only, pre-existing)
- 100K Random Mixed: 16.9-17.9M ops/s (PASS)
- 10K Larson: 25.9M ops/s (PASS)
- DEBUG ENV verification: Correctly ignored (PASS)

Result: 14 DEBUG ENV variables now have zero overhead in RELEASE builds.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-27 03:41:07 +09:00
+								#if HAKMEM_BUILD_RELEASE
 								    dbg = 0;
 								#else
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								    if (__builtin_expect(dbg == -1, 0)) {
 								        const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG");
 								        dbg = (e && *e && *e != '0') ? 1 : 0;
 								    }
-												ENV cleanup: Add RELEASE guards to DEBUG ENV variables (14 vars)

Added compile-time guards (#if HAKMEM_BUILD_RELEASE) to eliminate
DEBUG ENV variable overhead in RELEASE builds.

Variables guarded (14 total):
- HAKMEM_TINY_TRACE_RING, HAKMEM_TINY_DUMP_RING_ATEXIT
- HAKMEM_TINY_RF_TRACE, HAKMEM_TINY_MAILBOX_TRACE
- HAKMEM_TINY_MAILBOX_TRACE_LIMIT, HAKMEM_TINY_MAILBOX_SLOWDISC
- HAKMEM_TINY_MAILBOX_SLOWDISC_PERIOD
- HAKMEM_SS_PREWARM_DEBUG, HAKMEM_SS_FREE_DEBUG
- HAKMEM_TINY_FRONT_METRICS, HAKMEM_TINY_FRONT_DUMP
- HAKMEM_TINY_COUNTERS_DUMP, HAKMEM_TINY_REFILL_DUMP
- HAKMEM_PTR_TRACE_DUMP, HAKMEM_PTR_TRACE_VERBOSE

Files modified (9 core files):
- core/tiny_debug_ring.c (ring trace/dump)
- core/box/mailbox_box.c (mailbox trace + slowdisc)
- core/tiny_refill.h (refill trace)
- core/hakmem_tiny_superslab.c (superslab debug)
- core/box/ss_allocation_box.c (allocation debug)
- core/tiny_superslab_free.inc.h (free debug)
- core/box/front_metrics_box.c (frontend metrics)
- core/hakmem_tiny_stats.c (stats dump)
- core/ptr_trace.h (pointer trace)

Bug fixes during implementation:
1. mailbox_box.c - Fixed variable scope (moved 'used' outside guard)
2. hakmem_tiny_stats.c - Fixed incomplete declarations (on1, on2)

Impact:
- Binary size: -85KB total
  - bench_random_mixed_hakmem: 319K → 305K (-14K, -4.4%)
  - larson_hakmem: 380K → 309K (-71K, -18.7%)
- Performance: No regression (16.9-17.9M ops/s maintained)
- Functional: All tests pass (Random Mixed + Larson)
- Behavior: DEBUG ENV vars correctly ignored in RELEASE builds

Testing:
- Build: Clean compilation (warnings only, pre-existing)
- 100K Random Mixed: 16.9-17.9M ops/s (PASS)
- 10K Larson: 25.9M ops/s (PASS)
- DEBUG ENV verification: Correctly ignored (PASS)

Result: 14 DEBUG ENV variables now have zero overhead in RELEASE builds.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-27 03:41:07 +09:00
+								#endif
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
 								    // Phase 9: Try LRU cache first (lazy deallocation)
 								    SuperSlab* cached_ss = hak_ss_lru_pop(size_class);
 								    if (cached_ss) {
 								        ptr = (void*)cached_ss;
 								        from_cache = 1;
 								        // Debug logging for REFILL from LRU
 								        if (dbg == 1) {
 								            fprintf(stderr, "[REFILL] class=%d from_lru=1 ss=%p\n",
 								                    size_class, (void*)cached_ss);
 								        }
 								        // Skip old cache path - LRU cache takes priority
 								    } else {
 								        // Fallback to old cache (will be deprecated)
 								        ss_cache_precharge(size_class, ss_size, ss_mask);
 								        void* old_cached = ss_cache_pop(size_class);
 								        if (old_cached) {
 								            ptr = old_cached;
 								            from_cache = 1;
 								            // Debug logging for REFILL from prewarm (old cache is essentially prewarm)
 								            if (dbg == 1) {
 								                fprintf(stderr, "[REFILL] class=%d from_prewarm=1 ss=%p\n",
 								                        size_class, ptr);
 								            }
 								        }
 								    }
 								    if (!ptr) {
 								        int populate = atomic_exchange_explicit(&g_ss_populate_once, 0, memory_order_acq_rel);
 								        ptr = ss_os_acquire(size_class, ss_size, ss_mask, populate);
 								        if (!ptr) {
 								            return NULL;
 								        }
 								        // Debug logging for REFILL with new allocation
 								        if (dbg == 1) {
 								            fprintf(stderr, "[REFILL] class=%d new_alloc=1 ss=%p\n",
 								                    size_class, (void*)ptr);
 								        }
 								    }
 								    // Initialize SuperSlab header (Phase 12: no global size_class field)
 								    SuperSlab* ss = (SuperSlab*)ptr;
 								    ss->magic = SUPERSLAB_MAGIC;
 								    ss->active_slabs = 0;
 								    ss->lg_size = lg;  // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB)
 								    ss->slab_bitmap = 0;
 								    ss->nonempty_mask = 0;  // Phase 6-2.1: ChatGPT Pro P0 - init nonempty mask
-												Tiny Pool redesign: P0.1, P0.3, P1.1, P1.2 - Out-of-band class_idx lookup

This commit implements the first phase of Tiny Pool redesign based on
ChatGPT architecture review. The goal is to eliminate Header/Next pointer
conflicts by moving class_idx lookup out-of-band (to SuperSlab metadata).

## P0.1: C0(8B) class upgraded to 16B
- Size table changed: {16,32,64,128,256,512,1024,2048} (8 classes)
- LUT updated: 1..16 → class 0, 17..32 → class 1, etc.
- tiny_next_off: C0 now uses offset 1 (header preserved)
- Eliminates edge cases for 8B allocations

## P0.3: Slab reuse guard Box (tls_slab_reuse_guard_box.h)
- New Box for draining TLS SLL before slab reuse
- ENV gate: HAKMEM_TINY_SLAB_REUSE_GUARD=1
- Prevents stale pointers when slabs are recycled
- Follows Box theory: single responsibility, minimal API

## P1.1: SuperSlab class_map addition
- Added uint8_t class_map[SLABS_PER_SUPERSLAB_MAX] to SuperSlab
- Maps slab_idx → class_idx for out-of-band lookup
- Initialized to 255 (UNASSIGNED) on SuperSlab creation
- Set correctly on slab initialization in all backends

## P1.2: Free fast path uses class_map
- ENV gate: HAKMEM_TINY_USE_CLASS_MAP=1
- Free path can now get class_idx from class_map instead of Header
- Falls back to Header read if class_map returns invalid value
- Fixed Legacy Backend dynamic slab initialization bug

## Documentation added
- HAKMEM_ARCHITECTURE_OVERVIEW.md: 4-layer architecture analysis
- TLS_SLL_ARCHITECTURE_INVESTIGATION.md: Root cause analysis
- PTR_LIFECYCLE_TRACE_AND_ROOT_CAUSE_ANALYSIS.md: Pointer tracking
- TINY_REDESIGN_CHECKLIST.md: Implementation roadmap (P0-P3)

## Test results
- Baseline: 70% success rate (30% crash - pre-existing issue)
- class_map enabled: 70% success rate (same as baseline)
- Performance: ~30.5M ops/s (unchanged)

## Next steps (P1.3, P2, P3)
- P1.3: Add meta->active for accurate TLS/freelist sync
- P2: TLS SLL redesign with Box-based counting
- P3: Complete Header out-of-band migration

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-28 13:42:39 +09:00
+								    ss->freelist_mask = 0;  // P1.1 FIX: Initialize freelist_mask
 								    ss->empty_mask = 0;     // P1.1 FIX: Initialize empty_mask
 								    ss->empty_count = 0;    // P1.1 FIX: Initialize empty_count
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								    ss->partial_epoch = 0;
 								    ss->publish_hint = 0xFF;
 								    // Initialize atomics explicitly
 								    atomic_store_explicit(&ss->total_active_blocks, 0, memory_order_relaxed);
 								    atomic_store_explicit(&ss->refcount, 0, memory_order_relaxed);
 								    atomic_store_explicit(&ss->listed, 0, memory_order_relaxed);
 								    ss->partial_next = NULL;
 								    // Phase 9: Initialize LRU fields
 								    ss->last_used_ns = 0;
 								    ss->generation = 0;
 								    ss->lru_prev = NULL;
 								    ss->lru_next = NULL;
-												Tiny Pool redesign: P0.1, P0.3, P1.1, P1.2 - Out-of-band class_idx lookup

This commit implements the first phase of Tiny Pool redesign based on
ChatGPT architecture review. The goal is to eliminate Header/Next pointer
conflicts by moving class_idx lookup out-of-band (to SuperSlab metadata).

## P0.1: C0(8B) class upgraded to 16B
- Size table changed: {16,32,64,128,256,512,1024,2048} (8 classes)
- LUT updated: 1..16 → class 0, 17..32 → class 1, etc.
- tiny_next_off: C0 now uses offset 1 (header preserved)
- Eliminates edge cases for 8B allocations

## P0.3: Slab reuse guard Box (tls_slab_reuse_guard_box.h)
- New Box for draining TLS SLL before slab reuse
- ENV gate: HAKMEM_TINY_SLAB_REUSE_GUARD=1
- Prevents stale pointers when slabs are recycled
- Follows Box theory: single responsibility, minimal API

## P1.1: SuperSlab class_map addition
- Added uint8_t class_map[SLABS_PER_SUPERSLAB_MAX] to SuperSlab
- Maps slab_idx → class_idx for out-of-band lookup
- Initialized to 255 (UNASSIGNED) on SuperSlab creation
- Set correctly on slab initialization in all backends

## P1.2: Free fast path uses class_map
- ENV gate: HAKMEM_TINY_USE_CLASS_MAP=1
- Free path can now get class_idx from class_map instead of Header
- Falls back to Header read if class_map returns invalid value
- Fixed Legacy Backend dynamic slab initialization bug

## Documentation added
- HAKMEM_ARCHITECTURE_OVERVIEW.md: 4-layer architecture analysis
- TLS_SLL_ARCHITECTURE_INVESTIGATION.md: Root cause analysis
- PTR_LIFECYCLE_TRACE_AND_ROOT_CAUSE_ANALYSIS.md: Pointer tracking
- TINY_REDESIGN_CHECKLIST.md: Implementation roadmap (P0-P3)

## Test results
- Baseline: 70% success rate (30% crash - pre-existing issue)
- class_map enabled: 70% success rate (same as baseline)
- Performance: ~30.5M ops/s (unchanged)

## Next steps (P1.3, P2, P3)
- P1.3: Add meta->active for accurate TLS/freelist sync
- P2: TLS SLL redesign with Box-based counting
- P3: Complete Header out-of-band migration

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-28 13:42:39 +09:00
+								    // Phase 3d-C: Initialize hot/cold fields
 								    ss->hot_count = 0;
 								    ss->cold_count = 0;
 								    memset(ss->hot_indices, 0, sizeof(ss->hot_indices));
 								    memset(ss->cold_indices, 0, sizeof(ss->cold_indices));
 								    // Phase 12: Initialize next_chunk (legacy per-class chain)
 								    ss->next_chunk = NULL;
-												Comprehensive legacy cleanup and architecture consolidation

Summary of Changes:

MOVED TO ARCHIVE:
- core/hakmem_tiny_legacy_slow_box.inc → archive/
  * Slow path legacy code preserved for reference
  * Superseded by Gatekeeper Box architecture

- core/superslab_allocate.c → archive/superslab_allocate_legacy.c
  * Legacy SuperSlab allocation implementation
  * Functionality integrated into new Box system

- core/superslab_head.c → archive/superslab_head_legacy.c
  * Legacy slab head management
  * Refactored through Box architecture

REMOVED DEAD CODE:
- Eliminated unused allocation policy variants from ss_allocation_box.c
  * Reduced from 127+ lines of conditional logic to focused implementation
  * Removed: old policy branches, unused allocation strategies
  * Kept: current Box-based allocation path

ADDED NEW INFRASTRUCTURE:
- core/superslab_head_stub.c (41 lines)
  * Minimal stub for backward compatibility
  * Delegates to new architecture

- Enhanced core/superslab_cache.c (75 lines added)
  * Added missing API functions for cache management
  * Proper interface for SuperSlab cache integration

REFACTORED CORE SYSTEMS:
- core/hakmem_super_registry.c
  * Moved registration logic from scattered locations
  * Centralized SuperSlab registry management

- core/hakmem_tiny.c
  * Removed 27 lines of redundant initialization
  * Simplified through Box architecture

- core/hakmem_tiny_alloc.inc
  * Streamlined allocation path to use Gatekeeper
  * Removed legacy decision logic

- core/box/ss_allocation_box.c/h
  * Dramatically simplified allocation policy
  * Removed conditional branches for unused strategies
  * Focused on current Box-based approach

BUILD SYSTEM:
- Updated Makefile for archive structure
- Removed obsolete object file references
- Maintained build compatibility

SAFETY & TESTING:
- All deletions verified: no broken references
- Build verification: RELEASE=0 and RELEASE=1 pass
- Smoke tests: 100% pass rate
- Functional verification: allocation/free intact

Architecture Consolidation:
Before: Multiple overlapping allocation paths with legacy code branches
After:  Single unified path through Gatekeeper Boxes with clear architecture

Benefits:
- Reduced code size and complexity
- Improved maintainability
- Single source of truth for allocation logic
- Better diagnostic/observability hooks
- Foundation for future optimizations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 14:22:48 +09:00
+								    // Initialize all slab metadata (only up to max slabs for this size).
 								    // NOTE: 詳細な Slab 初期化と Remote Queue Drain は superslab_slab.c
 								    //（Slab Management Box）側に集約している。
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								    int max_slabs = (int)(ss_size / SLAB_SIZE);
 								    // DEFENSIVE FIX: Zero all slab metadata arrays to prevent ANY uninitialized pointers
 								    // This catches the 0xa2a2a2a2a2a2a2a2 pattern bug (ASan/debug fill pattern)
 								    // Even though mmap should return zeroed pages, sanitizers may fill with debug patterns
 								    memset(ss->slabs, 0, max_slabs * sizeof(TinySlabMeta));
 								    memset(ss->remote_heads, 0, max_slabs * sizeof(uintptr_t));
 								    memset(ss->remote_counts, 0, max_slabs * sizeof(uint32_t));
 								    memset(ss->slab_listed, 0, max_slabs * sizeof(uint32_t));
-												Tiny Pool redesign: P0.1, P0.3, P1.1, P1.2 - Out-of-band class_idx lookup

This commit implements the first phase of Tiny Pool redesign based on
ChatGPT architecture review. The goal is to eliminate Header/Next pointer
conflicts by moving class_idx lookup out-of-band (to SuperSlab metadata).

## P0.1: C0(8B) class upgraded to 16B
- Size table changed: {16,32,64,128,256,512,1024,2048} (8 classes)
- LUT updated: 1..16 → class 0, 17..32 → class 1, etc.
- tiny_next_off: C0 now uses offset 1 (header preserved)
- Eliminates edge cases for 8B allocations

## P0.3: Slab reuse guard Box (tls_slab_reuse_guard_box.h)
- New Box for draining TLS SLL before slab reuse
- ENV gate: HAKMEM_TINY_SLAB_REUSE_GUARD=1
- Prevents stale pointers when slabs are recycled
- Follows Box theory: single responsibility, minimal API

## P1.1: SuperSlab class_map addition
- Added uint8_t class_map[SLABS_PER_SUPERSLAB_MAX] to SuperSlab
- Maps slab_idx → class_idx for out-of-band lookup
- Initialized to 255 (UNASSIGNED) on SuperSlab creation
- Set correctly on slab initialization in all backends

## P1.2: Free fast path uses class_map
- ENV gate: HAKMEM_TINY_USE_CLASS_MAP=1
- Free path can now get class_idx from class_map instead of Header
- Falls back to Header read if class_map returns invalid value
- Fixed Legacy Backend dynamic slab initialization bug

## Documentation added
- HAKMEM_ARCHITECTURE_OVERVIEW.md: 4-layer architecture analysis
- TLS_SLL_ARCHITECTURE_INVESTIGATION.md: Root cause analysis
- PTR_LIFECYCLE_TRACE_AND_ROOT_CAUSE_ANALYSIS.md: Pointer tracking
- TINY_REDESIGN_CHECKLIST.md: Implementation roadmap (P0-P3)

## Test results
- Baseline: 70% success rate (30% crash - pre-existing issue)
- class_map enabled: 70% success rate (same as baseline)
- Performance: ~30.5M ops/s (unchanged)

## Next steps (P1.3, P2, P3)
- P1.3: Add meta->active for accurate TLS/freelist sync
- P2: TLS SLL redesign with Box-based counting
- P3: Complete Header out-of-band migration

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-28 13:42:39 +09:00
+								    // P1.1: Initialize class_map to UNASSIGNED (255) for all slabs
 								    // This ensures class_map is in a known state even before slabs are assigned
 								    memset(ss->class_map, 255, max_slabs * sizeof(uint8_t));
-												P0 Optimization: Shared Pool fast path with O(1) metadata lookup

Performance Results:
- Throughput: 2.66M ops/s → 3.8M ops/s (+43% improvement)
- sp_meta_find_or_create: O(N) linear scan → O(1) direct pointer
- Stage 2 metadata scan: 100% → 10-20% (80-90% reduction via hints)

Core Optimizations:

1. O(1) Metadata Lookup (superslab_types.h)
   - Added `shared_meta` pointer field to SuperSlab struct
   - Eliminates O(N) linear search through ss_metadata[] array
   - First access: O(N) scan + cache | Subsequent: O(1) direct return

2. sp_meta_find_or_create Fast Path (hakmem_shared_pool.c)
   - Check cached ss->shared_meta first before linear scan
   - Cache pointer after successful linear scan for future lookups
   - Reduces 7.8% CPU hotspot to near-zero for hot paths

3. Stage 2 Class Hints Fast Path (hakmem_shared_pool_acquire.c)
   - Try class_hints[class_idx] FIRST before full metadata scan
   - Uses O(1) ss->shared_meta lookup for hint validation
   - __builtin_expect() for branch prediction optimization
   - 80-90% of acquire calls now skip full metadata scan

4. Proper Initialization (ss_allocation_box.c)
   - Initialize shared_meta = NULL in superslab_allocate()
   - Ensures correct NULL-check semantics for new SuperSlabs

Additional Improvements:
- Updated ptr_trace and debug ring for release build efficiency
- Enhanced ENV variable documentation and analysis
- Added learner_env_box.h for configuration management
- Various Box optimizations for reduced overhead

Thread Safety:
- All atomic operations use correct memory ordering
- shared_meta cached under mutex protection
- Lock-free Stage 2 uses proper CAS with acquire/release semantics

Testing:
- Benchmark: 1M iterations, 3.8M ops/s stable
- Build: Clean compile RELEASE=0 and RELEASE=1
- No crashes, memory leaks, or correctness issues

Next Optimization Candidates:
- P1: Per-SuperSlab free slot bitmap for O(1) slot claiming
- P2: Reduce Stage 2 critical section size
- P3: Page pre-faulting (MAP_POPULATE)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 16:21:54 +09:00
+								    // P0 Optimization: Initialize shared_meta pointer (used for O(1) metadata lookup)
 								    ss->shared_meta = NULL;
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								    if (from_cache) {
 								        ss_stats_cache_reuse();
 								    }
 								    // Phase 8.3: Update ACE current_lg to match allocated size
 								    g_ss_ace[size_class].current_lg = lg;
 								    // Phase 1: Register SuperSlab in global registry for fast lookup
 								    // CRITICAL: Register AFTER full initialization (ss structure is ready)
 								    uintptr_t base = (uintptr_t)ss;
-												WIP: Add TLS SLL validation and SuperSlab registry fallback

ChatGPT's diagnostic changes to address TLS_SLL_HDR_RESET issue.
Current status: Partial mitigation, but root cause remains.

Changes Applied:
1. SuperSlab Registry Fallback (hakmem_super_registry.h)
   - Added legacy table probe when hash map lookup misses
   - Prevents NULL returns for valid SuperSlabs during initialization
   - Status: ✅ Works but may hide underlying registration issues

2. TLS SLL Push Validation (tls_sll_box.h)
   - Reject push if SuperSlab lookup returns NULL
   - Reject push if class_idx mismatch detected
   - Added [TLS_SLL_PUSH_NO_SS] diagnostic message
   - Status: ✅ Prevents list corruption (defensive)

3. SuperSlab Allocation Class Fix (superslab_allocate.c)
   - Pass actual class_idx to sp_internal_allocate_superslab
   - Prevents dummy class=8 causing OOB access
   - Status: ✅ Root cause fix for allocation path

4. Debug Output Additions
   - First 256 push/pop operations traced
   - First 4 mismatches logged with details
   - SuperSlab registration state logged
   - Status: ✅ Diagnostic tool (not a fix)

5. TLS Hint Box Removed
   - Deleted ss_tls_hint_box.{c,h} (Phase 1 optimization)
   - Simplified to focus on stability first
   - Status: ⏳ Can be re-added after root cause fixed

Current Problem (REMAINS UNSOLVED):
- [TLS_SLL_HDR_RESET] still occurs after ~60 seconds of sh8bench
- Pointer is 16 bytes offset from expected (class 1 → class 2 boundary)
- hak_super_lookup returns NULL for that pointer
- Suggests: Use-After-Free, Double-Free, or pointer arithmetic error

Root Cause Analysis:
- Pattern: Pointer offset by +16 (one class 1 stride)
- Timing: Cumulative problem (appears after 60s, not immediately)
- Location: Header corruption detected during TLS SLL pop

Remaining Issues:
⚠️ Registry fallback is defensive (may hide registration bugs)
⚠️ Push validation prevents symptoms but not root cause
⚠️ 16-byte pointer offset source unidentified

Next Steps for Investigation:
1. Full pointer arithmetic audit (Magazine ⇔ TLS SLL paths)
2. Enhanced logging at HDR_RESET point:
   - Expected vs actual pointer value
   - Pointer provenance (where it came from)
   - Allocation trace for that block
3. Verify Headerless flag is OFF throughout build
4. Check for double-offset application in conversions

Technical Assessment:
- 60% root cause fixes (allocation class, validation)
- 40% defensive mitigation (registry fallback, push rejection)

Performance Impact:
- Registry fallback: +10-30 cycles on cold path (negligible)
- Push validation: +5-10 cycles per push (acceptable)
- Overall: < 2% performance impact estimated

Related Issues:
- Phase 1 TLS Hint Box removed temporarily
- Phase 2 Headerless blocked until stability achieved

🤖 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 20:42:28 +09:00
+								    int reg_ok = hak_super_register(base, ss);
 								    if (!reg_ok) {
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								        // Registry full - this is a fatal error
 								        fprintf(stderr, "HAKMEM FATAL: SuperSlab registry full, cannot register %p\n", ss);
 								        // Still return ss to avoid memory leak, but lookups may fail
 								    }
-												WIP: Add TLS SLL validation and SuperSlab registry fallback

ChatGPT's diagnostic changes to address TLS_SLL_HDR_RESET issue.
Current status: Partial mitigation, but root cause remains.

Changes Applied:
1. SuperSlab Registry Fallback (hakmem_super_registry.h)
   - Added legacy table probe when hash map lookup misses
   - Prevents NULL returns for valid SuperSlabs during initialization
   - Status: ✅ Works but may hide underlying registration issues

2. TLS SLL Push Validation (tls_sll_box.h)
   - Reject push if SuperSlab lookup returns NULL
   - Reject push if class_idx mismatch detected
   - Added [TLS_SLL_PUSH_NO_SS] diagnostic message
   - Status: ✅ Prevents list corruption (defensive)

3. SuperSlab Allocation Class Fix (superslab_allocate.c)
   - Pass actual class_idx to sp_internal_allocate_superslab
   - Prevents dummy class=8 causing OOB access
   - Status: ✅ Root cause fix for allocation path

4. Debug Output Additions
   - First 256 push/pop operations traced
   - First 4 mismatches logged with details
   - SuperSlab registration state logged
   - Status: ✅ Diagnostic tool (not a fix)

5. TLS Hint Box Removed
   - Deleted ss_tls_hint_box.{c,h} (Phase 1 optimization)
   - Simplified to focus on stability first
   - Status: ⏳ Can be re-added after root cause fixed

Current Problem (REMAINS UNSOLVED):
- [TLS_SLL_HDR_RESET] still occurs after ~60 seconds of sh8bench
- Pointer is 16 bytes offset from expected (class 1 → class 2 boundary)
- hak_super_lookup returns NULL for that pointer
- Suggests: Use-After-Free, Double-Free, or pointer arithmetic error

Root Cause Analysis:
- Pattern: Pointer offset by +16 (one class 1 stride)
- Timing: Cumulative problem (appears after 60s, not immediately)
- Location: Header corruption detected during TLS SLL pop

Remaining Issues:
⚠️ Registry fallback is defensive (may hide registration bugs)
⚠️ Push validation prevents symptoms but not root cause
⚠️ 16-byte pointer offset source unidentified

Next Steps for Investigation:
1. Full pointer arithmetic audit (Magazine ⇔ TLS SLL paths)
2. Enhanced logging at HDR_RESET point:
   - Expected vs actual pointer value
   - Pointer provenance (where it came from)
   - Allocation trace for that block
3. Verify Headerless flag is OFF throughout build
4. Check for double-offset application in conversions

Technical Assessment:
- 60% root cause fixes (allocation class, validation)
- 40% defensive mitigation (registry fallback, push rejection)

Performance Impact:
- Registry fallback: +10-30 cycles on cold path (negligible)
- Push validation: +5-10 cycles per push (acceptable)
- Overall: < 2% performance impact estimated

Related Issues:
- Phase 1 TLS Hint Box removed temporarily
- Phase 2 Headerless blocked until stability achieved

🤖 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 20:42:28 +09:00
+								    do {
 								        static _Atomic uint32_t g_ss_reg_log_shot = 0;
 								        uint32_t shot = atomic_fetch_add_explicit(&g_ss_reg_log_shot, 1, memory_order_relaxed);
 								        if (shot < 4) {
 								            fprintf(stderr,
 								                    "[SS_REG_DEBUG] class=%u ss=%p reg_ok=%d map_count=%zu\n",
 								                    (unsigned)size_class,
 								                    (void*)ss,
 								                    reg_ok,
 								                    g_ss_addr_map.count);
 								            fflush(stderr);
 								        }
 								    } while (0);
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
 								    return ss;
 								}
 								// ============================================================================
 								// SuperSlab Deallocation
 								// ============================================================================
 								void superslab_free(SuperSlab* ss) {
 								    if (!ss || ss->magic != SUPERSLAB_MAGIC) {
 								        return;  // Invalid SuperSlab
 								    }
-												Add SuperSlab refcount pinning and critical failsafe guards

Major breakthrough: sh8bench now completes without SIGSEGV!
Added defensive refcounting and failsafe mechanisms to prevent
use-after-free and corruption propagation.

Changes:
1. SuperSlab Refcount Pinning (core/box/tls_sll_box.h)
   - tls_sll_push_impl: increment refcount before adding to list
   - tls_sll_pop_impl: decrement refcount when removing from list
   - Prevents SuperSlab from being freed while TLS SLL holds pointers

2. SuperSlab Release Guards (core/superslab_allocate.c, shared_pool_release.c)
   - Check refcount > 0 before freeing SuperSlab
   - If refcount > 0, defer release instead of freeing
   - Prevents use-after-free when TLS/remote/freelist hold stale pointers

3. TLS SLL Next Pointer Validation (core/box/tls_sll_box.h)
   - Detect invalid next pointer during traversal
   - Log [TLS_SLL_NEXT_INVALID] when detected
   - Drop list to prevent corruption propagation

4. Unified Cache Freelist Validation (core/front/tiny_unified_cache.c)
   - Validate freelist head before use
   - Log [UNIFIED_FREELIST_INVALID] for corrupted lists
   - Defensive drop to prevent bad allocations

5. Early Refcount Decrement Fix (core/tiny_free_fast.inc.h)
   - Removed ss_active_dec_one from fast path
   - Prevents premature refcount depletion
   - Defers decrement to proper cleanup path

Test Results:
✅ sh8bench completes successfully (exit code 0)
✅ No SIGSEGV or ABORT signals
✅ Short runs (5s) crash-free
⚠️ Multiple [TLS_SLL_NEXT_INVALID] / [UNIFIED_FREELIST_INVALID] logged
⚠️ Invalid pointers still present (stale references exist)

Status Analysis:
- Stability: ACHIEVED (no crashes)
- Root Cause: NOT FULLY SOLVED (invalid pointers remain)
- Approach: Defensive + refcount guards working well

Remaining Issues:
❌ Why does SuperSlab get unregistered while TLS SLL holds pointers?
❌ SuperSlab lifecycle: remote_queue / adopt / LRU interactions?
❌ Stale pointers indicate improper SuperSlab lifetime management

Performance Impact:
- Refcount operations: +1-3 cycles per push/pop (minor)
- Validation checks: +2-5 cycles (minor)
- Overall: < 5% overhead estimated

Next Investigation:
- Trace SuperSlab lifecycle (allocation → registration → unregister → free)
- Check remote_queue handling
- Verify adopt/LRU mechanisms
- Correlate stale pointer logs with SuperSlab unregister events

Log Volume Warning:
- May produce many diagnostic logs on long runs
- Consider ENV gating for production

Technical Notes:
- Refcount is per-SuperSlab, not global
- Guards prevent symptom propagation, not root cause
- Root cause is in SuperSlab lifecycle management

🤖 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-03 21:56:52 +09:00
+								    // Guard: do not free while pinned by TLS/remote holders
 								    uint32_t ss_refs = atomic_load_explicit(&ss->refcount, memory_order_acquire);
 								    if (__builtin_expect(ss_refs != 0, 0)) {
 								#if !HAKMEM_BUILD_RELEASE
 								        static _Atomic uint32_t g_ss_free_pinned = 0;
 								        uint32_t shot = atomic_fetch_add_explicit(&g_ss_free_pinned, 1, memory_order_relaxed);
 								        if (shot < 8) {
 								            fprintf(stderr, "[SS_FREE_SKIP_PINNED] ss=%p refcount=%u\n", (void*)ss, (unsigned)ss_refs);
 								        }
 								#endif
 								        return;
 								    }
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								    // ADD DEBUG LOGGING
 								    static __thread int dbg = -1;
-												ENV cleanup: Add RELEASE guards to DEBUG ENV variables (14 vars)

Added compile-time guards (#if HAKMEM_BUILD_RELEASE) to eliminate
DEBUG ENV variable overhead in RELEASE builds.

Variables guarded (14 total):
- HAKMEM_TINY_TRACE_RING, HAKMEM_TINY_DUMP_RING_ATEXIT
- HAKMEM_TINY_RF_TRACE, HAKMEM_TINY_MAILBOX_TRACE
- HAKMEM_TINY_MAILBOX_TRACE_LIMIT, HAKMEM_TINY_MAILBOX_SLOWDISC
- HAKMEM_TINY_MAILBOX_SLOWDISC_PERIOD
- HAKMEM_SS_PREWARM_DEBUG, HAKMEM_SS_FREE_DEBUG
- HAKMEM_TINY_FRONT_METRICS, HAKMEM_TINY_FRONT_DUMP
- HAKMEM_TINY_COUNTERS_DUMP, HAKMEM_TINY_REFILL_DUMP
- HAKMEM_PTR_TRACE_DUMP, HAKMEM_PTR_TRACE_VERBOSE

Files modified (9 core files):
- core/tiny_debug_ring.c (ring trace/dump)
- core/box/mailbox_box.c (mailbox trace + slowdisc)
- core/tiny_refill.h (refill trace)
- core/hakmem_tiny_superslab.c (superslab debug)
- core/box/ss_allocation_box.c (allocation debug)
- core/tiny_superslab_free.inc.h (free debug)
- core/box/front_metrics_box.c (frontend metrics)
- core/hakmem_tiny_stats.c (stats dump)
- core/ptr_trace.h (pointer trace)

Bug fixes during implementation:
1. mailbox_box.c - Fixed variable scope (moved 'used' outside guard)
2. hakmem_tiny_stats.c - Fixed incomplete declarations (on1, on2)

Impact:
- Binary size: -85KB total
  - bench_random_mixed_hakmem: 319K → 305K (-14K, -4.4%)
  - larson_hakmem: 380K → 309K (-71K, -18.7%)
- Performance: No regression (16.9-17.9M ops/s maintained)
- Functional: All tests pass (Random Mixed + Larson)
- Behavior: DEBUG ENV vars correctly ignored in RELEASE builds

Testing:
- Build: Clean compilation (warnings only, pre-existing)
- 100K Random Mixed: 16.9-17.9M ops/s (PASS)
- 10K Larson: 25.9M ops/s (PASS)
- DEBUG ENV verification: Correctly ignored (PASS)

Result: 14 DEBUG ENV variables now have zero overhead in RELEASE builds.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-27 03:41:07 +09:00
+								#if HAKMEM_BUILD_RELEASE
 								    dbg = 0;
 								#else
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								    if (__builtin_expect(dbg == -1, 0)) {
 								        const char* e = getenv("HAKMEM_SS_FREE_DEBUG");
 								        dbg = (e && *e && *e != '0') ? 1 : 0;
 								    }
-												ENV cleanup: Add RELEASE guards to DEBUG ENV variables (14 vars)

Added compile-time guards (#if HAKMEM_BUILD_RELEASE) to eliminate
DEBUG ENV variable overhead in RELEASE builds.

Variables guarded (14 total):
- HAKMEM_TINY_TRACE_RING, HAKMEM_TINY_DUMP_RING_ATEXIT
- HAKMEM_TINY_RF_TRACE, HAKMEM_TINY_MAILBOX_TRACE
- HAKMEM_TINY_MAILBOX_TRACE_LIMIT, HAKMEM_TINY_MAILBOX_SLOWDISC
- HAKMEM_TINY_MAILBOX_SLOWDISC_PERIOD
- HAKMEM_SS_PREWARM_DEBUG, HAKMEM_SS_FREE_DEBUG
- HAKMEM_TINY_FRONT_METRICS, HAKMEM_TINY_FRONT_DUMP
- HAKMEM_TINY_COUNTERS_DUMP, HAKMEM_TINY_REFILL_DUMP
- HAKMEM_PTR_TRACE_DUMP, HAKMEM_PTR_TRACE_VERBOSE

Files modified (9 core files):
- core/tiny_debug_ring.c (ring trace/dump)
- core/box/mailbox_box.c (mailbox trace + slowdisc)
- core/tiny_refill.h (refill trace)
- core/hakmem_tiny_superslab.c (superslab debug)
- core/box/ss_allocation_box.c (allocation debug)
- core/tiny_superslab_free.inc.h (free debug)
- core/box/front_metrics_box.c (frontend metrics)
- core/hakmem_tiny_stats.c (stats dump)
- core/ptr_trace.h (pointer trace)

Bug fixes during implementation:
1. mailbox_box.c - Fixed variable scope (moved 'used' outside guard)
2. hakmem_tiny_stats.c - Fixed incomplete declarations (on1, on2)

Impact:
- Binary size: -85KB total
  - bench_random_mixed_hakmem: 319K → 305K (-14K, -4.4%)
  - larson_hakmem: 380K → 309K (-71K, -18.7%)
- Performance: No regression (16.9-17.9M ops/s maintained)
- Functional: All tests pass (Random Mixed + Larson)
- Behavior: DEBUG ENV vars correctly ignored in RELEASE builds

Testing:
- Build: Clean compilation (warnings only, pre-existing)
- 100K Random Mixed: 16.9-17.9M ops/s (PASS)
- 10K Larson: 25.9M ops/s (PASS)
- DEBUG ENV verification: Correctly ignored (PASS)

Result: 14 DEBUG ENV variables now have zero overhead in RELEASE builds.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-27 03:41:07 +09:00
+								#endif
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								    if (dbg == 1) {
 								        fprintf(stderr, "[SS_FREE] CALLED: ss=%p lg_size=%d active_slabs=%u\n",
 								                (void*)ss, ss->lg_size, ss->active_slabs);
 								    }
 								    // Phase 9: Lazy Deallocation - try to cache in LRU instead of munmap
 								    size_t ss_size = (size_t)1 << ss->lg_size;
 								    // Phase 1: Unregister SuperSlab from registry FIRST
 								    // CRITICAL: Must unregister BEFORE adding to LRU cache
 								    // Reason: Cached SuperSlabs should NOT be found by lookups
 								    uintptr_t base = (uintptr_t)ss;
 								    hak_super_unregister(base);
 								    // Memory fence to ensure unregister is visible
 								    atomic_thread_fence(memory_order_release);
 								    // Phase 9: Try LRU cache first (lazy deallocation)
 								    // NOTE: LRU cache keeps magic=SUPERSLAB_MAGIC for validation
 								    // Magic will be cleared on eviction or reuse
 								    int lru_cached = hak_ss_lru_push(ss);
 								    if (dbg == 1) {
 								        fprintf(stderr, "[SS_FREE] hak_ss_lru_push() returned %d\n", lru_cached);
 								    }
 								    if (lru_cached) {
 								        // Successfully cached in LRU - defer munmap
 								        return;
 								    }
 								    // LRU cache full or disabled - try old cache using head class_idx (if known)
 								    int old_cached = ss_cache_push(0, ss);
 								    if (old_cached) {
 								        ss_stats_cache_store();
 								        return;
 								    }
-												Comprehensive legacy cleanup and architecture consolidation

Summary of Changes:

MOVED TO ARCHIVE:
- core/hakmem_tiny_legacy_slow_box.inc → archive/
  * Slow path legacy code preserved for reference
  * Superseded by Gatekeeper Box architecture

- core/superslab_allocate.c → archive/superslab_allocate_legacy.c
  * Legacy SuperSlab allocation implementation
  * Functionality integrated into new Box system

- core/superslab_head.c → archive/superslab_head_legacy.c
  * Legacy slab head management
  * Refactored through Box architecture

REMOVED DEAD CODE:
- Eliminated unused allocation policy variants from ss_allocation_box.c
  * Reduced from 127+ lines of conditional logic to focused implementation
  * Removed: old policy branches, unused allocation strategies
  * Kept: current Box-based allocation path

ADDED NEW INFRASTRUCTURE:
- core/superslab_head_stub.c (41 lines)
  * Minimal stub for backward compatibility
  * Delegates to new architecture

- Enhanced core/superslab_cache.c (75 lines added)
  * Added missing API functions for cache management
  * Proper interface for SuperSlab cache integration

REFACTORED CORE SYSTEMS:
- core/hakmem_super_registry.c
  * Moved registration logic from scattered locations
  * Centralized SuperSlab registry management

- core/hakmem_tiny.c
  * Removed 27 lines of redundant initialization
  * Simplified through Box architecture

- core/hakmem_tiny_alloc.inc
  * Streamlined allocation path to use Gatekeeper
  * Removed legacy decision logic

- core/box/ss_allocation_box.c/h
  * Dramatically simplified allocation policy
  * Removed conditional branches for unused strategies
  * Focused on current Box-based approach

BUILD SYSTEM:
- Updated Makefile for archive structure
- Removed obsolete object file references
- Maintained build compatibility

SAFETY & TESTING:
- All deletions verified: no broken references
- Build verification: RELEASE=0 and RELEASE=1 pass
- Smoke tests: 100% pass rate
- Functional verification: allocation/free intact

Architecture Consolidation:
Before: Multiple overlapping allocation paths with legacy code branches
After:  Single unified path through Gatekeeper Boxes with clear architecture

Benefits:
- Reduced code size and complexity
- Improved maintainability
- Single source of truth for allocation logic
- Better diagnostic/observability hooks
- Foundation for future optimizations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 14:22:48 +09:00
+								    // Phase E3-1: Check never-free policy before munmap (DISABLED - policy field not yet implemented)
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								    // If policy forbids Tiny SuperSlab munmap, skip deallocation (leak is intentional)
-												Comprehensive legacy cleanup and architecture consolidation

Summary of Changes:

MOVED TO ARCHIVE:
- core/hakmem_tiny_legacy_slow_box.inc → archive/
  * Slow path legacy code preserved for reference
  * Superseded by Gatekeeper Box architecture

- core/superslab_allocate.c → archive/superslab_allocate_legacy.c
  * Legacy SuperSlab allocation implementation
  * Functionality integrated into new Box system

- core/superslab_head.c → archive/superslab_head_legacy.c
  * Legacy slab head management
  * Refactored through Box architecture

REMOVED DEAD CODE:
- Eliminated unused allocation policy variants from ss_allocation_box.c
  * Reduced from 127+ lines of conditional logic to focused implementation
  * Removed: old policy branches, unused allocation strategies
  * Kept: current Box-based allocation path

ADDED NEW INFRASTRUCTURE:
- core/superslab_head_stub.c (41 lines)
  * Minimal stub for backward compatibility
  * Delegates to new architecture

- Enhanced core/superslab_cache.c (75 lines added)
  * Added missing API functions for cache management
  * Proper interface for SuperSlab cache integration

REFACTORED CORE SYSTEMS:
- core/hakmem_super_registry.c
  * Moved registration logic from scattered locations
  * Centralized SuperSlab registry management

- core/hakmem_tiny.c
  * Removed 27 lines of redundant initialization
  * Simplified through Box architecture

- core/hakmem_tiny_alloc.inc
  * Streamlined allocation path to use Gatekeeper
  * Removed legacy decision logic

- core/box/ss_allocation_box.c/h
  * Dramatically simplified allocation policy
  * Removed conditional branches for unused strategies
  * Focused on current Box-based approach

BUILD SYSTEM:
- Updated Makefile for archive structure
- Removed obsolete object file references
- Maintained build compatibility

SAFETY & TESTING:
- All deletions verified: no broken references
- Build verification: RELEASE=0 and RELEASE=1 pass
- Smoke tests: 100% pass rate
- Functional verification: allocation/free intact

Architecture Consolidation:
Before: Multiple overlapping allocation paths with legacy code branches
After:  Single unified path through Gatekeeper Boxes with clear architecture

Benefits:
- Reduced code size and complexity
- Improved maintainability
- Single source of truth for allocation logic
- Better diagnostic/observability hooks
- Foundation for future optimizations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 14:22:48 +09:00
+								    // TODO: Add tiny_ss_never_free_global field to FrozenPolicy when implementing Phase E3-1
 								#if 0
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								    const FrozenPolicy* pol = hkm_policy_get();
 								    if (pol && pol->tiny_ss_never_free_global) {
 								        // Policy forbids munmap - keep SuperSlab allocated (intentional "leak")
 								        // Watermark enforcement will be added in Phase E3-2
 								#if !HAKMEM_BUILD_RELEASE
 								        fprintf(stderr, "[SS_POLICY_SKIP] Skipping munmap (never_free policy) ss=%p size=%zu\n",
 								                (void*)ss, ss_size);
 								#endif
 								        return;
 								    }
-												Comprehensive legacy cleanup and architecture consolidation

Summary of Changes:

MOVED TO ARCHIVE:
- core/hakmem_tiny_legacy_slow_box.inc → archive/
  * Slow path legacy code preserved for reference
  * Superseded by Gatekeeper Box architecture

- core/superslab_allocate.c → archive/superslab_allocate_legacy.c
  * Legacy SuperSlab allocation implementation
  * Functionality integrated into new Box system

- core/superslab_head.c → archive/superslab_head_legacy.c
  * Legacy slab head management
  * Refactored through Box architecture

REMOVED DEAD CODE:
- Eliminated unused allocation policy variants from ss_allocation_box.c
  * Reduced from 127+ lines of conditional logic to focused implementation
  * Removed: old policy branches, unused allocation strategies
  * Kept: current Box-based allocation path

ADDED NEW INFRASTRUCTURE:
- core/superslab_head_stub.c (41 lines)
  * Minimal stub for backward compatibility
  * Delegates to new architecture

- Enhanced core/superslab_cache.c (75 lines added)
  * Added missing API functions for cache management
  * Proper interface for SuperSlab cache integration

REFACTORED CORE SYSTEMS:
- core/hakmem_super_registry.c
  * Moved registration logic from scattered locations
  * Centralized SuperSlab registry management

- core/hakmem_tiny.c
  * Removed 27 lines of redundant initialization
  * Simplified through Box architecture

- core/hakmem_tiny_alloc.inc
  * Streamlined allocation path to use Gatekeeper
  * Removed legacy decision logic

- core/box/ss_allocation_box.c/h
  * Dramatically simplified allocation policy
  * Removed conditional branches for unused strategies
  * Focused on current Box-based approach

BUILD SYSTEM:
- Updated Makefile for archive structure
- Removed obsolete object file references
- Maintained build compatibility

SAFETY & TESTING:
- All deletions verified: no broken references
- Build verification: RELEASE=0 and RELEASE=1 pass
- Smoke tests: 100% pass rate
- Functional verification: allocation/free intact

Architecture Consolidation:
Before: Multiple overlapping allocation paths with legacy code branches
After:  Single unified path through Gatekeeper Boxes with clear architecture

Benefits:
- Reduced code size and complexity
- Improved maintainability
- Single source of truth for allocation logic
- Better diagnostic/observability hooks
- Foundation for future optimizations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 14:22:48 +09:00
+								#endif
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
 								    // Both caches full - immediately free to OS (eager deallocation)
 								    // Clear magic to prevent use-after-free
 								    ss->magic = 0;
 								#if !HAKMEM_BUILD_RELEASE
 								    fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n",
 								            (void*)ss, ss_size,
 								            atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed));
 								#endif
 								    munmap(ss, ss_size);
 								    // Update statistics for actual release to OS
 								    pthread_mutex_lock(&g_superslab_lock);
 								    g_superslabs_freed++;
 								    // Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here
 								    g_bytes_allocated -= ss_size;
 								    pthread_mutex_unlock(&g_superslab_lock);
 								#if !HAKMEM_BUILD_RELEASE
 								    fprintf(stderr, "[DEBUG ss_os_release] g_superslabs_freed now = %llu\n",
 								            (unsigned long long)g_superslabs_freed);
 								#endif
 								}
-												Comprehensive legacy cleanup and architecture consolidation

Summary of Changes:

MOVED TO ARCHIVE:
- core/hakmem_tiny_legacy_slow_box.inc → archive/
  * Slow path legacy code preserved for reference
  * Superseded by Gatekeeper Box architecture

- core/superslab_allocate.c → archive/superslab_allocate_legacy.c
  * Legacy SuperSlab allocation implementation
  * Functionality integrated into new Box system

- core/superslab_head.c → archive/superslab_head_legacy.c
  * Legacy slab head management
  * Refactored through Box architecture

REMOVED DEAD CODE:
- Eliminated unused allocation policy variants from ss_allocation_box.c
  * Reduced from 127+ lines of conditional logic to focused implementation
  * Removed: old policy branches, unused allocation strategies
  * Kept: current Box-based allocation path

ADDED NEW INFRASTRUCTURE:
- core/superslab_head_stub.c (41 lines)
  * Minimal stub for backward compatibility
  * Delegates to new architecture

- Enhanced core/superslab_cache.c (75 lines added)
  * Added missing API functions for cache management
  * Proper interface for SuperSlab cache integration

REFACTORED CORE SYSTEMS:
- core/hakmem_super_registry.c
  * Moved registration logic from scattered locations
  * Centralized SuperSlab registry management

- core/hakmem_tiny.c
  * Removed 27 lines of redundant initialization
  * Simplified through Box architecture

- core/hakmem_tiny_alloc.inc
  * Streamlined allocation path to use Gatekeeper
  * Removed legacy decision logic

- core/box/ss_allocation_box.c/h
  * Dramatically simplified allocation policy
  * Removed conditional branches for unused strategies
  * Focused on current Box-based approach

BUILD SYSTEM:
- Updated Makefile for archive structure
- Removed obsolete object file references
- Maintained build compatibility

SAFETY & TESTING:
- All deletions verified: no broken references
- Build verification: RELEASE=0 and RELEASE=1 pass
- Smoke tests: 100% pass rate
- Functional verification: allocation/free intact

Architecture Consolidation:
Before: Multiple overlapping allocation paths with legacy code branches
After:  Single unified path through Gatekeeper Boxes with clear architecture

Benefits:
- Reduced code size and complexity
- Improved maintainability
- Single source of truth for allocation logic
- Better diagnostic/observability hooks
- Foundation for future optimizations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 14:22:48 +09:00
+								// ============================================================================
-												Phase 3d-A: SlabMeta Box boundary - Encapsulate SuperSlab metadata access

ChatGPT-guided Box theory refactoring (Phase A: Boundary only).

Changes:
- Created ss_slab_meta_box.h with 15 inline accessor functions
  - HOT fields (8): freelist, used, capacity (fast path)
  - COLD fields (6): class_idx, carved, owner_tid_low (init/debug)
  - Legacy (1): ss_slab_meta_ptr() for atomic ops
- Migrated 14 direct slabs[] access sites across 6 files
  - hakmem_shared_pool.c (4 sites)
  - tiny_free_fast_v2.inc.h (1 site)
  - hakmem_tiny.c (3 sites)
  - external_guard_box.h (1 site)
  - hakmem_tiny_lifecycle.inc (1 site)
  - ss_allocation_box.c (4 sites)

Architecture:
- Zero overhead (static inline wrappers)
- Single point of change for future layout optimizations
- Enables Hot/Cold split (Phase C) without touching call sites
- A/B testing support via compile-time flags

Verification:
- Build: ✅ Success (no errors)
- Stability: ✅ All sizes pass (128B-1KB, 22-24M ops/s)
- Behavior: Unchanged (thin wrapper, no logic changes)

Next: Phase B (TLS Cache Merge, +12-18% expected)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-20 02:01:52 +09:00
+								// Slab Initialization within SuperSlab
-												Comprehensive legacy cleanup and architecture consolidation

Summary of Changes:

MOVED TO ARCHIVE:
- core/hakmem_tiny_legacy_slow_box.inc → archive/
  * Slow path legacy code preserved for reference
  * Superseded by Gatekeeper Box architecture

- core/superslab_allocate.c → archive/superslab_allocate_legacy.c
  * Legacy SuperSlab allocation implementation
  * Functionality integrated into new Box system

- core/superslab_head.c → archive/superslab_head_legacy.c
  * Legacy slab head management
  * Refactored through Box architecture

REMOVED DEAD CODE:
- Eliminated unused allocation policy variants from ss_allocation_box.c
  * Reduced from 127+ lines of conditional logic to focused implementation
  * Removed: old policy branches, unused allocation strategies
  * Kept: current Box-based allocation path

ADDED NEW INFRASTRUCTURE:
- core/superslab_head_stub.c (41 lines)
  * Minimal stub for backward compatibility
  * Delegates to new architecture

- Enhanced core/superslab_cache.c (75 lines added)
  * Added missing API functions for cache management
  * Proper interface for SuperSlab cache integration

REFACTORED CORE SYSTEMS:
- core/hakmem_super_registry.c
  * Moved registration logic from scattered locations
  * Centralized SuperSlab registry management

- core/hakmem_tiny.c
  * Removed 27 lines of redundant initialization
  * Simplified through Box architecture

- core/hakmem_tiny_alloc.inc
  * Streamlined allocation path to use Gatekeeper
  * Removed legacy decision logic

- core/box/ss_allocation_box.c/h
  * Dramatically simplified allocation policy
  * Removed conditional branches for unused strategies
  * Focused on current Box-based approach

BUILD SYSTEM:
- Updated Makefile for archive structure
- Removed obsolete object file references
- Maintained build compatibility

SAFETY & TESTING:
- All deletions verified: no broken references
- Build verification: RELEASE=0 and RELEASE=1 pass
- Smoke tests: 100% pass rate
- Functional verification: allocation/free intact

Architecture Consolidation:
Before: Multiple overlapping allocation paths with legacy code branches
After:  Single unified path through Gatekeeper Boxes with clear architecture

Benefits:
- Reduced code size and complexity
- Improved maintainability
- Single source of truth for allocation logic
- Better diagnostic/observability hooks
- Foundation for future optimizations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-12-04 14:22:48 +09:00
+								// ============================================================================
 								// Note: superslab_init_slab() は superslab_slab.c（Slab Management Box）
 								// に実装されており、この Box では export しない。