hakmem/core/hakmem_tiny_background.inc

// Background Refill Bin (per-class lock-free SLL) — fills in background so the
// front path only does a single CAS pop when both slots/bump are empty.
static int g_bg_bin_enable = 0;                // ENV toggle removed (fixed OFF)
static int g_bg_bin_target = 128;              // Fixed target (legacy default)
static _Atomic uintptr_t g_bg_bin_head[TINY_NUM_CLASSES];
static pthread_t g_bg_bin_thread;
static volatile int g_bg_bin_stop = 0;
static int g_bg_bin_started = 0;
// Inline helpers
#include "hakmem_tiny_bg_bin.inc.h"

// ============================================================================
// EXTRACTED TO hakmem_tiny_remote_target.c (Phase 2C-1)
// ============================================================================
// Targeted remote-drain queue moved to separate module
// Functions: remote_target_enqueue(), remote_target_pop()
// Variables: g_bg_remote_enable, g_remote_target_head, g_remote_target_len, g_bg_remote_batch

// ============================================================================
// EXTRACTED TO hakmem_tiny_bg_spill.c/.h (Phase 2C-2)
// ============================================================================
// Background spill/drain queue for SuperSlab freelist returns
// Functions: bg_spill_push_one(), bg_spill_push_chain(), bg_spill_drain_class(), bg_spill_init()
// Variables: g_bg_spill_enable, g_bg_spill_target, g_bg_spill_max_batch, g_bg_spill_head[], g_bg_spill_len[]


static void* tiny_bg_refill_main(void* arg) {
    (void)arg;
    const int sleep_us = 1000; // 1ms
    while (!g_bg_bin_stop) {
        if (!g_bg_bin_enable) { usleep(sleep_us); continue; }
        for (int k = 0; k < TINY_NUM_CLASSES; k++) {
            // まずは小クラスだけ対象（シンプルに）
            if (!is_hot_class(k)) continue;
            int have = bgbin_length_approx(k, g_bg_bin_target);
            if (have >= g_bg_bin_target) continue;
            int need = g_bg_bin_target - have;

            // 生成チェーンを作る（free listやbitmapから、裏で重い処理OK）
            void* chain_head = NULL; void* chain_tail = NULL; int built = 0;
            pthread_mutex_t* lock = &g_tiny_class_locks[k].m;
            pthread_mutex_lock(lock);
            TinySlab* slab = g_tiny_pool.free_slabs[k];
            // Adopt first slab with free blocks; if none, allocate one
            if (!slab) slab = allocate_new_slab(k);
            while (need > 0 && slab) {
                if (slab->free_count == 0) { slab = slab->next; continue; }
                int idx = hak_tiny_find_free_block(slab);
                if (idx < 0) { slab = slab->next; continue; }
                hak_tiny_set_used(slab, idx);
                slab->free_count--;
                size_t bs = g_tiny_class_sizes[k];
                void* p = (char*)slab->base + (idx * bs);
                // prepend to local chain
                tiny_next_write(k, p, chain_head);  // Box API: next pointer write
                chain_head = p;
                if (!chain_tail) chain_tail = p;
                built++; need--;
            }
            pthread_mutex_unlock(lock);

            if (built > 0) {
                bgbin_push_chain(k, chain_head, chain_tail);
            }
        }
        // Drain background spill queues (SuperSlab freelist return)
        // EXTRACTED: Drain logic moved to hakmem_tiny_bg_spill.c (Phase 2C-2)
        if (g_bg_spill_enable) {
            for (int k = 0; k < TINY_NUM_CLASSES; k++) {
                pthread_mutex_t* lock = &g_tiny_class_locks[k].m;
                bg_spill_drain_class(k, lock);
            }
        }
        // Drain remote frees: targeted by per-class queue (avoid scanning all slabs)
        if (g_bg_remote_enable) {
            for (int k = 0; k < TINY_NUM_CLASSES; k++) {
                int processed = 0;
                while (processed < g_bg_remote_batch) {
                    TinySlab* s = remote_target_pop(k);
                    if (!s) break;
                    pthread_mutex_t* lock = &g_tiny_class_locks[k].m;
                    pthread_mutex_lock(lock);
                    tiny_remote_drain_locked(s);
                    pthread_mutex_unlock(lock);
                    processed++;
                    // If more remain (due to concurrent pushes), the slab may be re-enqueued
                    // by producers when threshold is hit again.
                }
            }
        }
        usleep(sleep_us);
    }
    return NULL;
}

static inline void eventq_push(int class_idx, uint32_t size) {
    eventq_push_ex(class_idx, size, HAK_TIER_FRONT, 0, 0, 0);
}

static void* intelligence_engine_main(void* arg) {
    (void)arg;
    const int sleep_us = 100000; // 100ms
    int hist[TINY_NUM_CLASSES] = {0};
    int cnt[TINY_NUM_CLASSES] = {0};
    // Tiny の学習は既定でOFF（実アプリは後段で学習）：
    // HAKMEM_INT_ADAPT_REFILL=1 / HAKMEM_INT_ADAPT_CAPS=1 を明示設定した場合のみON
    int adapt_refill = 0;  // default OFF for Tiny
    int adapt_caps = 0;    // default OFF for Tiny (env can enable)
    char* arf = getenv("HAKMEM_INT_ADAPT_REFILL");
    if (arf) adapt_refill = (atoi(arf) != 0);
    char* acp = getenv("HAKMEM_INT_ADAPT_CAPS");
    if (acp) adapt_caps = (atoi(acp) != 0);
    const int REFILL_MIN = 32, REFILL_MAX = 256;
    const int REFILL_HOT_MIN = 96, REFILL_HOT_MAX = 320;
    // Tiny diet (memory-tight) knobs
    {
        char* rb = getenv("HAKMEM_TINY_RSS_BUDGET_KB");
        if (rb) { int v = atoi(rb); if (v > 0) g_tiny_rss_budget_kb = v; }
        char* st = getenv("HAKMEM_TINY_DIET_STEP");
        if (st) { int v = atoi(st); if (v > 0 && v < 256) g_tiny_diet_step = v; }
        char* tt = getenv("HAKMEM_TINY_INT_TIGHT");
        if (tt) g_tiny_int_tight = (atoi(tt) != 0);
        for (int k = 0; k < TINY_NUM_CLASSES; k++) {
            char var[64]; snprintf(var, sizeof(var), "HAKMEM_TINY_CAP_FLOOR_C%d", k);
            char* vf = getenv(var);
            if (vf) { int v = atoi(vf); if (v > 0 && v < TINY_TLS_MAG_CAP) g_tiny_cap_floor[k] = v; }
        }
    }
    // Idle trim knob
    int idle_trim_ms = 0;
    int idle_flush = 0; // flush magazines on idle tick (optional)
    {
        char* it = getenv("HAKMEM_TINY_IDLE_TRIM_MS");
        if (it) { int v = atoi(it); if (v > 0) idle_trim_ms = v; }
        char* iff = getenv("HAKMEM_TINY_IDLE_FLUSH");
        if (iff) idle_flush = (atoi(iff) != 0);
    }
    int idle_trim_ticks = (idle_trim_ms > 0) ? (idle_trim_ms * 1000 / sleep_us) : 0;
    int idle_tick = 0;

    while (!g_int_stop) {
        // Drain events
        uint32_t h = atomic_load_explicit(&g_ev_head, memory_order_relaxed);
        uint32_t t = atomic_load_explicit(&g_ev_tail, memory_order_acquire);
        while (h != t) {
            AllocEvent ev = g_ev_ring[h & EVENTQ_MASK];
            if (ev.class_idx < TINY_NUM_CLASSES) {
                hist[ev.class_idx]++;
                // TODO: use ev.tier_hit/flags/site_id for richer adaptations
            }
            h++;
        }
        atomic_store_explicit(&g_ev_head, h, memory_order_release);

        // Snapshot counts for this window
        for (int k = 0; k < TINY_NUM_CLASSES; k++) { cnt[k] = hist[k]; }

        // Simple adaptive rule: if class seen a lot, increase fill target; else reduce
        for (int k = 0; k < TINY_NUM_CLASSES; k++) {
            int count = cnt[k];
            hist[k] = 0;  // reset for next window
            int cur = atomic_load_explicit(&g_frontend_fill_target[k], memory_order_relaxed);
            if (count > 1000) {
                int nv = cur + 32; if (nv > 256) nv = 256;  // cap
                atomic_store_explicit(&g_frontend_fill_target[k], nv, memory_order_relaxed);
            } else if (count < 200) {
                int nv = cur - 16; if (nv < 0) nv = 0;
                atomic_store_explicit(&g_frontend_fill_target[k], nv, memory_order_relaxed);
            }
        }
        // Stage 1: adjust refill batch bounds by class grouping (hot tiny vs others)
        if (adapt_refill) {
            int hot_sum = 0, other_sum = 0;
            for (int k = 0; k < TINY_NUM_CLASSES; k++) {
                int cur = atomic_load_explicit(&g_frontend_fill_target[k], memory_order_relaxed);
                if (k <= 3) hot_sum += cur; else other_sum += cur;
            }
            if (hot_sum > 512) {
                int nv = g_tiny_refill_max_hot + 16; if (nv > REFILL_HOT_MAX) nv = REFILL_HOT_MAX; g_tiny_refill_max_hot = nv;
            } else if (hot_sum < 64) {
                int nv = g_tiny_refill_max_hot - 16; if (nv < REFILL_HOT_MIN) nv = REFILL_HOT_MIN; g_tiny_refill_max_hot = nv;
            }
            if (other_sum > 256) {
                int nv = g_tiny_refill_max + 16; if (nv > REFILL_MAX) nv = REFILL_MAX; g_tiny_refill_max = nv;
            } else if (other_sum < 32) {
                int nv = g_tiny_refill_max - 16; if (nv < REFILL_MIN) nv = REFILL_MIN; g_tiny_refill_max = nv;
            }
        }

        // Adapt per-class MAG/SLL caps (light-touch; protects hot classes)
        if (adapt_caps) {
            for (int k = 0; k < TINY_NUM_CLASSES; k++) {
                int hot = (k <= 3);
                // Heuristic thresholds per window
                // Hot classes raise caps more aggressively
                int up_th = hot ? 800 : 1000;
                int dn_th = hot ? 120 : 200;
                if (g_tiny_int_tight) { dn_th = hot ? 200 : 300; }

                // MAG cap override: move toward [min..max] within guard rails
                int mag = g_mag_cap_override[k];
                int mag_min;
                switch (k) {
                    case 0: case 1: case 2: mag_min = 128; break; // 8/16/32B
                    case 3: mag_min = 256; break;                  // 64B (allow larger later)
                    case 4: mag_min = 128; break;                  // 128B
                    default: mag_min = 64; break;
                }
                int mag_max = 512; // soft ceiling; global hard ceiling is TINY_TLS_MAG_CAP
                if (k == 3) mag_max = 1024;
                if (mag <= 0) mag = mag_min; // start from baseline

                if (cnt[k] > up_th) { mag += 16; if (mag > mag_max) mag = mag_max; }
                else if (cnt[k] < dn_th) { mag -= 16; if (mag < mag_min) mag = mag_min; }
                g_mag_cap_override[k] = mag;

                // SLL cap override (hot classes only); keep absolute cap modest
                if (hot) {
                    int sll = g_sll_cap_override[k];
                    if (sll <= 0) sll = 256; // starting point for hot classes
                    int sll_min = 128;
                    if (g_tiny_int_tight && g_tiny_cap_floor[k] > 0) sll_min = g_tiny_cap_floor[k];
                    int sll_max = 1024;
                    if (cnt[k] > up_th) { sll += 32; if (sll > sll_max) sll = sll_max; }
                    else if (cnt[k] < dn_th) { sll -= 32; if (sll < sll_min) sll = sll_min; }
                    g_sll_cap_override[k] = sll;
                }
            }
        }
        // Enforce Tiny RSS budget (if enabled): when over budget, shrink per-class caps by step
        if (g_tiny_rss_budget_kb > 0) {
            int rss = get_rss_kb_self();
            if (rss > g_tiny_rss_budget_kb) {
                for (int k = 0; k < TINY_NUM_CLASSES; k++) {
                    int floor = g_tiny_cap_floor[k]; if (floor <= 0) floor = 64;
                    int mag = g_mag_cap_override[k]; if (mag <= 0) mag = tiny_effective_cap(k);
                    mag -= g_tiny_diet_step; if (mag < floor) mag = floor; g_mag_cap_override[k] = mag;
                    // Phase12: SLL cap 調整は g_sll_cap_override ではなくポリシー側が担当するため、ここでは変更しない。
                }
            }
        }
        // Optional periodic idle trim (try to keep overhead small)
        if (idle_trim_ticks > 0) {
            idle_tick++;
            if (idle_tick >= idle_trim_ticks) {
                idle_tick = 0;
                // Optional bounded flush of magazines to enable SS empty detection
                if (idle_flush) hak_tiny_magazine_flush_all();
                // Bounded trim: uses per-class locks briefly; acceptable in background
                hak_tiny_trim();
            }
        }
        usleep(sleep_us);
    }
    return NULL;
}
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								// Background Refill Bin (per-class lock-free SLL) — fills in background so the
 								// front path only does a single CAS pop when both slots/bump are empty.
-												ENV cleanup: Remove BG/HotMag vars & guard fprintf (Larson 52.3M ops/s)

Phase 1 完了：環境変数整理 + fprintf デバッグガード

ENV変数削除（BG/HotMag系）:
- core/hakmem_tiny_init.inc: HotMag ENV 削除 (~131 lines)
- core/hakmem_tiny_bg_spill.c: BG spill ENV 削除
- core/tiny_refill.h: BG remote 固定値化
- core/hakmem_tiny_slow.inc: BG refs 削除

fprintf Debug Guards (#if !HAKMEM_BUILD_RELEASE):
- core/hakmem_shared_pool.c: Lock stats (~18 fprintf)
- core/page_arena.c: Init/Shutdown/Stats (~27 fprintf)
- core/hakmem.c: SIGSEGV init message

ドキュメント整理:
- 328 markdown files 削除（旧レポート・重複docs）

性能確認:
- Larson: 52.35M ops/s (前回52.8M、安定動作✅)
- ENV整理による機能影響なし
- Debug出力は一部残存（次phase で対応）

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-26 14:45:26 +09:00
+								static int g_bg_bin_enable = 0;                // ENV toggle removed (fixed OFF)
 								static int g_bg_bin_target = 128;              // Fixed target (legacy default)
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								static _Atomic uintptr_t g_bg_bin_head[TINY_NUM_CLASSES];
 								static pthread_t g_bg_bin_thread;
 								static volatile int g_bg_bin_stop = 0;
 								static int g_bg_bin_started = 0;
 								// Inline helpers
 								#include "hakmem_tiny_bg_bin.inc.h"
 								// ============================================================================
 								// EXTRACTED TO hakmem_tiny_remote_target.c (Phase 2C-1)
 								// ============================================================================
 								// Targeted remote-drain queue moved to separate module
 								// Functions: remote_target_enqueue(), remote_target_pop()
 								// Variables: g_bg_remote_enable, g_remote_target_head, g_remote_target_len, g_bg_remote_batch
 								// ============================================================================
 								// EXTRACTED TO hakmem_tiny_bg_spill.c/.h (Phase 2C-2)
 								// ============================================================================
 								// Background spill/drain queue for SuperSlab freelist returns
 								// Functions: bg_spill_push_one(), bg_spill_push_chain(), bg_spill_drain_class(), bg_spill_init()
 								// Variables: g_bg_spill_enable, g_bg_spill_target, g_bg_spill_max_batch, g_bg_spill_head[], g_bg_spill_len[]
 								static void* tiny_bg_refill_main(void* arg) {
 								    (void)arg;
 								    const int sleep_us = 1000; // 1ms
 								    while (!g_bg_bin_stop) {
 								        if (!g_bg_bin_enable) { usleep(sleep_us); continue; }
 								        for (int k = 0; k < TINY_NUM_CLASSES; k++) {
 								            // まずは小クラスだけ対象（シンプルに）
 								            if (!is_hot_class(k)) continue;
 								            int have = bgbin_length_approx(k, g_bg_bin_target);
 								            if (have >= g_bg_bin_target) continue;
 								            int need = g_bg_bin_target - have;
 								            // 生成チェーンを作る（free listやbitmapから、裏で重い処理OK）
 								            void* chain_head = NULL; void* chain_tail = NULL; int built = 0;
 								            pthread_mutex_t* lock = &g_tiny_class_locks[k].m;
 								            pthread_mutex_lock(lock);
 								            TinySlab* slab = g_tiny_pool.free_slabs[k];
 								            // Adopt first slab with free blocks; if none, allocate one
 								            if (!slab) slab = allocate_new_slab(k);
 								            while (need > 0 && slab) {
 								                if (slab->free_count == 0) { slab = slab->next; continue; }
 								                int idx = hak_tiny_find_free_block(slab);
 								                if (idx < 0) { slab = slab->next; continue; }
 								                hak_tiny_set_used(slab, idx);
 								                slab->free_count--;
 								                size_t bs = g_tiny_class_sizes[k];
 								                void* p = (char*)slab->base + (idx * bs);
 								                // prepend to local chain
-												Phase E3-FINAL: Fix Box API offset bugs - ALL classes now use correct offsets

## Root Cause Analysis (GPT5)

**Physical Layout Constraints**:
- Class 0: 8B = [1B header][7B payload] → offset 1 = 9B needed = ❌ IMPOSSIBLE
- Class 1-6: >=16B = [1B header][15B+ payload] → offset 1 = ✅ POSSIBLE
- Class 7: 1KB → offset 0 (compatibility)

**Correct Specification**:
- HAKMEM_TINY_HEADER_CLASSIDX != 0:
  - Class 0, 7: next at offset 0 (overwrites header when on freelist)
  - Class 1-6: next at offset 1 (after header)
- HAKMEM_TINY_HEADER_CLASSIDX == 0:
  - All classes: next at offset 0

**Previous Bug**:
- Attempted "ALL classes offset 1" unification
- Class 0 with offset 1 caused immediate SEGV (9B > 8B block size)
- Mixed 2-arg/3-arg API caused confusion

## Fixes Applied

### 1. Restored 3-Argument Box API (core/box/tiny_next_ptr_box.h)
```c
// Correct signatures
void tiny_next_write(int class_idx, void* base, void* next_value)
void* tiny_next_read(int class_idx, const void* base)

// Correct offset calculation
size_t offset = (class_idx == 0 || class_idx == 7) ? 0 : 1;
```

### 2. Updated 123+ Call Sites Across 34 Files
- hakmem_tiny_hot_pop_v4.inc.h (4 locations)
- hakmem_tiny_fastcache.inc.h (3 locations)
- hakmem_tiny_tls_list.h (12 locations)
- superslab_inline.h (5 locations)
- tiny_fastcache.h (3 locations)
- ptr_trace.h (macro definitions)
- tls_sll_box.h (2 locations)
- + 27 additional files

Pattern: `tiny_next_read(base)` → `tiny_next_read(class_idx, base)`
Pattern: `tiny_next_write(base, next)` → `tiny_next_write(class_idx, base, next)`

### 3. Added Sentinel Detection Guards
- tiny_fast_push(): Block nodes with sentinel in ptr or ptr->next
- tls_list_push(): Block nodes with sentinel in ptr or ptr->next
- Defense-in-depth against remote free sentinel leakage

## Verification (GPT5 Report)

**Test Command**: `./out/release/bench_random_mixed_hakmem --iterations=70000`

**Results**:
- ✅ Main loop completed successfully
- ✅ Drain phase completed successfully
- ✅ NO SEGV (previous crash at iteration 66151 is FIXED)
- ℹ️ Final log: "tiny_alloc(1024) failed" is normal fallback to Mid/ACE layers

**Analysis**:
- Class 0 immediate SEGV: ✅ RESOLVED (correct offset 0 now used)
- 66K iteration crash: ✅ RESOLVED (offset consistency fixed)
- Box API conflicts: ✅ RESOLVED (unified 3-arg API)

## Technical Details

### Offset Logic Justification
```
Class 0:  8B block → next pointer (8B) fits ONLY at offset 0
Class 1: 16B block → next pointer (8B) fits at offset 1 (after 1B header)
Class 2: 32B block → next pointer (8B) fits at offset 1
...
Class 6: 512B block → next pointer (8B) fits at offset 1
Class 7: 1024B block → offset 0 for legacy compatibility
```

### Files Modified (Summary)
- Core API: `box/tiny_next_ptr_box.h`
- Hot paths: `hakmem_tiny_hot_pop*.inc.h`, `tiny_fastcache.h`
- TLS layers: `hakmem_tiny_tls_list.h`, `hakmem_tiny_tls_ops.h`
- SuperSlab: `superslab_inline.h`, `tiny_superslab_*.inc.h`
- Refill: `hakmem_tiny_refill.inc.h`, `tiny_refill_opt.h`
- Free paths: `tiny_free_magazine.inc.h`, `tiny_superslab_free.inc.h`
- Documentation: Multiple Phase E3 reports

## Remaining Work

None for Box API offset bugs - all structural issues resolved.

Future enhancements (non-critical):
- Periodic `grep -R '*(void**)' core/` to detect direct pointer access violations
- Enforce Box API usage via static analysis
- Document offset rationale in architecture docs

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-13 06:50:20 +09:00
+								                tiny_next_write(k, p, chain_head);  // Box API: next pointer write
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								                chain_head = p;
 								                if (!chain_tail) chain_tail = p;
 								                built++; need--;
 								            }
 								            pthread_mutex_unlock(lock);
 								            if (built > 0) {
 								                bgbin_push_chain(k, chain_head, chain_tail);
 								            }
 								        }
 								        // Drain background spill queues (SuperSlab freelist return)
 								        // EXTRACTED: Drain logic moved to hakmem_tiny_bg_spill.c (Phase 2C-2)
 								        if (g_bg_spill_enable) {
 								            for (int k = 0; k < TINY_NUM_CLASSES; k++) {
 								                pthread_mutex_t* lock = &g_tiny_class_locks[k].m;
 								                bg_spill_drain_class(k, lock);
 								            }
 								        }
 								        // Drain remote frees: targeted by per-class queue (avoid scanning all slabs)
 								        if (g_bg_remote_enable) {
 								            for (int k = 0; k < TINY_NUM_CLASSES; k++) {
 								                int processed = 0;
 								                while (processed < g_bg_remote_batch) {
 								                    TinySlab* s = remote_target_pop(k);
 								                    if (!s) break;
 								                    pthread_mutex_t* lock = &g_tiny_class_locks[k].m;
 								                    pthread_mutex_lock(lock);
 								                    tiny_remote_drain_locked(s);
 								                    pthread_mutex_unlock(lock);
 								                    processed++;
 								                    // If more remain (due to concurrent pushes), the slab may be re-enqueued
 								                    // by producers when threshold is hit again.
 								                }
 								            }
 								        }
 								        usleep(sleep_us);
 								    }
 								    return NULL;
 								}
 								static inline void eventq_push(int class_idx, uint32_t size) {
 								    eventq_push_ex(class_idx, size, HAK_TIER_FRONT, 0, 0, 0);
 								}
 								static void* intelligence_engine_main(void* arg) {
 								    (void)arg;
 								    const int sleep_us = 100000; // 100ms
 								    int hist[TINY_NUM_CLASSES] = {0};
 								    int cnt[TINY_NUM_CLASSES] = {0};
 								    // Tiny の学習は既定でOFF（実アプリは後段で学習）：
 								    // HAKMEM_INT_ADAPT_REFILL=1 / HAKMEM_INT_ADAPT_CAPS=1 を明示設定した場合のみON
 								    int adapt_refill = 0;  // default OFF for Tiny
 								    int adapt_caps = 0;    // default OFF for Tiny (env can enable)
 								    char* arf = getenv("HAKMEM_INT_ADAPT_REFILL");
 								    if (arf) adapt_refill = (atoi(arf) != 0);
 								    char* acp = getenv("HAKMEM_INT_ADAPT_CAPS");
 								    if (acp) adapt_caps = (atoi(acp) != 0);
 								    const int REFILL_MIN = 32, REFILL_MAX = 256;
 								    const int REFILL_HOT_MIN = 96, REFILL_HOT_MAX = 320;
 								    // Tiny diet (memory-tight) knobs
 								    {
 								        char* rb = getenv("HAKMEM_TINY_RSS_BUDGET_KB");
 								        if (rb) { int v = atoi(rb); if (v > 0) g_tiny_rss_budget_kb = v; }
 								        char* st = getenv("HAKMEM_TINY_DIET_STEP");
 								        if (st) { int v = atoi(st); if (v > 0 && v < 256) g_tiny_diet_step = v; }
 								        char* tt = getenv("HAKMEM_TINY_INT_TIGHT");
 								        if (tt) g_tiny_int_tight = (atoi(tt) != 0);
 								        for (int k = 0; k < TINY_NUM_CLASSES; k++) {
 								            char var[64]; snprintf(var, sizeof(var), "HAKMEM_TINY_CAP_FLOOR_C%d", k);
 								            char* vf = getenv(var);
 								            if (vf) { int v = atoi(vf); if (v > 0 && v < TINY_TLS_MAG_CAP) g_tiny_cap_floor[k] = v; }
 								        }
 								    }
 								    // Idle trim knob
 								    int idle_trim_ms = 0;
 								    int idle_flush = 0; // flush magazines on idle tick (optional)
 								    {
 								        char* it = getenv("HAKMEM_TINY_IDLE_TRIM_MS");
 								        if (it) { int v = atoi(it); if (v > 0) idle_trim_ms = v; }
 								        char* iff = getenv("HAKMEM_TINY_IDLE_FLUSH");
 								        if (iff) idle_flush = (atoi(iff) != 0);
 								    }
 								    int idle_trim_ticks = (idle_trim_ms > 0) ? (idle_trim_ms * 1000 / sleep_us) : 0;
 								    int idle_tick = 0;
 								    while (!g_int_stop) {
 								        // Drain events
 								        uint32_t h = atomic_load_explicit(&g_ev_head, memory_order_relaxed);
 								        uint32_t t = atomic_load_explicit(&g_ev_tail, memory_order_acquire);
 								        while (h != t) {
 								            AllocEvent ev = g_ev_ring[h & EVENTQ_MASK];
 								            if (ev.class_idx < TINY_NUM_CLASSES) {
 								                hist[ev.class_idx]++;
 								                // TODO: use ev.tier_hit/flags/site_id for richer adaptations
 								            }
 								            h++;
 								        }
 								        atomic_store_explicit(&g_ev_head, h, memory_order_release);
 								        // Snapshot counts for this window
 								        for (int k = 0; k < TINY_NUM_CLASSES; k++) { cnt[k] = hist[k]; }
 								        // Simple adaptive rule: if class seen a lot, increase fill target; else reduce
 								        for (int k = 0; k < TINY_NUM_CLASSES; k++) {
 								            int count = cnt[k];
 								            hist[k] = 0;  // reset for next window
 								            int cur = atomic_load_explicit(&g_frontend_fill_target[k], memory_order_relaxed);
 								            if (count > 1000) {
 								                int nv = cur + 32; if (nv > 256) nv = 256;  // cap
 								                atomic_store_explicit(&g_frontend_fill_target[k], nv, memory_order_relaxed);
 								            } else if (count < 200) {
 								                int nv = cur - 16; if (nv < 0) nv = 0;
 								                atomic_store_explicit(&g_frontend_fill_target[k], nv, memory_order_relaxed);
 								            }
 								        }
 								        // Stage 1: adjust refill batch bounds by class grouping (hot tiny vs others)
 								        if (adapt_refill) {
 								            int hot_sum = 0, other_sum = 0;
 								            for (int k = 0; k < TINY_NUM_CLASSES; k++) {
 								                int cur = atomic_load_explicit(&g_frontend_fill_target[k], memory_order_relaxed);
 								                if (k <= 3) hot_sum += cur; else other_sum += cur;
 								            }
 								            if (hot_sum > 512) {
 								                int nv = g_tiny_refill_max_hot + 16; if (nv > REFILL_HOT_MAX) nv = REFILL_HOT_MAX; g_tiny_refill_max_hot = nv;
 								            } else if (hot_sum < 64) {
 								                int nv = g_tiny_refill_max_hot - 16; if (nv < REFILL_HOT_MIN) nv = REFILL_HOT_MIN; g_tiny_refill_max_hot = nv;
 								            }
 								            if (other_sum > 256) {
 								                int nv = g_tiny_refill_max + 16; if (nv > REFILL_MAX) nv = REFILL_MAX; g_tiny_refill_max = nv;
 								            } else if (other_sum < 32) {
 								                int nv = g_tiny_refill_max - 16; if (nv < REFILL_MIN) nv = REFILL_MIN; g_tiny_refill_max = nv;
 								            }
 								        }
 								        // Adapt per-class MAG/SLL caps (light-touch; protects hot classes)
 								        if (adapt_caps) {
 								            for (int k = 0; k < TINY_NUM_CLASSES; k++) {
 								                int hot = (k <= 3);
 								                // Heuristic thresholds per window
 								                // Hot classes raise caps more aggressively
 								                int up_th = hot ? 800 : 1000;
 								                int dn_th = hot ? 120 : 200;
 								                if (g_tiny_int_tight) { dn_th = hot ? 200 : 300; }
 								                // MAG cap override: move toward [min..max] within guard rails
 								                int mag = g_mag_cap_override[k];
 								                int mag_min;
 								                switch (k) {
 								                    case 0: case 1: case 2: mag_min = 128; break; // 8/16/32B
 								                    case 3: mag_min = 256; break;                  // 64B (allow larger later)
 								                    case 4: mag_min = 128; break;                  // 128B
 								                    default: mag_min = 64; break;
 								                }
 								                int mag_max = 512; // soft ceiling; global hard ceiling is TINY_TLS_MAG_CAP
 								                if (k == 3) mag_max = 1024;
 								                if (mag <= 0) mag = mag_min; // start from baseline
 								                if (cnt[k] > up_th) { mag += 16; if (mag > mag_max) mag = mag_max; }
 								                else if (cnt[k] < dn_th) { mag -= 16; if (mag < mag_min) mag = mag_min; }
 								                g_mag_cap_override[k] = mag;
 								                // SLL cap override (hot classes only); keep absolute cap modest
 								                if (hot) {
 								                    int sll = g_sll_cap_override[k];
 								                    if (sll <= 0) sll = 256; // starting point for hot classes
 								                    int sll_min = 128;
 								                    if (g_tiny_int_tight && g_tiny_cap_floor[k] > 0) sll_min = g_tiny_cap_floor[k];
 								                    int sll_max = 1024;
 								                    if (cnt[k] > up_th) { sll += 32; if (sll > sll_max) sll = sll_max; }
 								                    else if (cnt[k] < dn_th) { sll -= 32; if (sll < sll_min) sll = sll_min; }
 								                    g_sll_cap_override[k] = sll;
 								                }
 								            }
 								        }
 								        // Enforce Tiny RSS budget (if enabled): when over budget, shrink per-class caps by step
 								        if (g_tiny_rss_budget_kb > 0) {
 								            int rss = get_rss_kb_self();
 								            if (rss > g_tiny_rss_budget_kb) {
 								                for (int k = 0; k < TINY_NUM_CLASSES; k++) {
 								                    int floor = g_tiny_cap_floor[k]; if (floor <= 0) floor = 64;
 								                    int mag = g_mag_cap_override[k]; if (mag <= 0) mag = tiny_effective_cap(k);
 								                    mag -= g_tiny_diet_step; if (mag < floor) mag = floor; g_mag_cap_override[k] = mag;
-												Phase12 debug: restore SUPERSLAB constants/APIs, implement Box2 drain boundary, fix tiny_fast_pop to return BASE, honor TLS SLL toggle in alloc/free fast paths, add fail-fast stubs, and quiet capacity sentinel. Update CURRENT_TASK with A/B results (SLL-off stable; SLL-on crash).

											
										
										
											2025-11-14 01:02:00 +09:00
+								                    // Phase12: SLL cap 調整は g_sll_cap_override ではなくポリシー側が担当するため、ここでは変更しない。
-												Debug Counters Implementation - Clean History

Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-11-05 12:31:14 +09:00
+								                }
 								            }
 								        }
 								        // Optional periodic idle trim (try to keep overhead small)
 								        if (idle_trim_ticks > 0) {
 								            idle_tick++;
 								            if (idle_tick >= idle_trim_ticks) {
 								                idle_tick = 0;
 								                // Optional bounded flush of magazines to enable SS empty detection
 								                if (idle_flush) hak_tiny_magazine_flush_all();
 								                // Bounded trim: uses per-class locks briefly; acceptable in background
 								                hak_tiny_trim();
 								            }
 								        }
 								        usleep(sleep_us);
 								    }
 								    return NULL;
 								}