Debug Counters Implementation - Clean History

Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-05 12:31:14 +09:00
commit 52386401b3
27144 changed files with 124451 additions and 0 deletions
--- a/benchmarks/src/memset_test.c
+++ b/benchmarks/src/memset_test.c
@ -0,0 +1,21 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+int main() {
+    struct timespec t1, t2;
+    void* buf = malloc(2 * 1024 * 1024);
+    
+    clock_gettime(CLOCK_MONOTONIC, &t1);
+    memset(buf, 0xEF, 2 * 1024 * 1024);
+    clock_gettime(CLOCK_MONOTONIC, &t2);
+    
+    double ms = (t2.tv_sec - t1.tv_sec) * 1000.0 + (t2.tv_nsec - t1.tv_nsec) / 1000000.0;
+    
+    printf("memset time: %.3f ms\n", ms);
+    printf("First byte: 0x%02x (should be 0xEF)\n", ((unsigned char*)buf)[0]);
+    
+    free(buf);
+    return 0;
+}
--- a/benchmarks/src/pf_test.c
+++ b/benchmarks/src/pf_test.c
@ -0,0 +1,71 @@
+// pf_test.c - Page Fault Investigation
+#include "hakmem.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#define SIZE (2 * 1024 * 1024)
+
+static void get_pf(unsigned long *soft, unsigned long *hard) {
+    FILE* f = fopen("/proc/self/stat", "r");
+    if (!f) { *soft = 0; *hard = 0; return; }
+    
+    unsigned long minflt = 0, majflt = 0;
+    (void)fscanf(f, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %lu %*u %lu",
+                 &minflt, &majflt);
+    fclose(f);
+    *soft = minflt;
+    *hard = majflt;
+}
+
+void test_hakmem() {
+    unsigned long pf_before, pf_after, hard_before, hard_after;
+    
+    printf("\n=== hakmem Test ===\n");
+    hak_init();
+    
+    get_pf(&pf_before, &hard_before);
+    
+    for (int i = 0; i < 10; i++) {
+        void* buf = hak_alloc_cs(SIZE);
+        memset(buf, 0xEF, SIZE);
+        hak_free_cs(buf, SIZE);
+    }
+    
+    get_pf(&pf_after, &hard_after);
+    
+    printf("Page Faults: %lu (soft), %lu (hard)\n",
+           pf_after - pf_before, hard_after - hard_before);
+    
+    hak_shutdown();
+}
+
+void test_system() {
+    unsigned long pf_before, pf_after, hard_before, hard_after;
+    
+    printf("\n=== system malloc Test ===\n");
+    
+    get_pf(&pf_before, &hard_before);
+    
+    for (int i = 0; i < 10; i++) {
+        void* buf = malloc(SIZE);
+        memset(buf, 0xEF, SIZE);
+        free(buf);
+    }
+    
+    get_pf(&pf_after, &hard_after);
+    
+    printf("Page Faults: %lu (soft), %lu (hard)\n",
+           pf_after - pf_before, hard_after - hard_before);
+}
+
+int main() {
+    printf("=== Page Fault Comparison ===\n");
+    printf("Size: 2MB × 10 iterations\n");
+    
+    test_system();
+    test_hakmem();
+    
+    return 0;
+}
--- a/benchmarks/src/tiny/phase6/README.md
+++ b/benchmarks/src/tiny/phase6/README.md
@ -0,0 +1,71 @@
+# Phase 6 Tiny Allocator Benchmarks
+
+## 📁 ファイル構成
+
+### ベンチマーク
+- `bench_tiny_simple.c` - Phase 6-1 Ultra-Simple Fast Path ベンチマーク
+  - 10M iterations LIFO pattern
+  - 独立した実装 (`core/hakmem_tiny_simple.{h,c}`)
+  - 既存のHAKMEM allocatorとは**別実装**
+
+### テスト
+- `test_tiny_simple.c` - Phase 6-1 機能テスト
+  - 基本的なalloc/free
+  - LIFO pattern
+  - 全サイズクラステスト
+
+## 🔨 ビルド
+
+```bash
+# Phase 6-1 ベンチマーク
+gcc -O2 -I../../../.. \
+    -o bench_tiny_simple \
+    bench_tiny_simple.c \
+    ../../../../core/hakmem_tiny_simple.c \
+    -lrt
+
+# Phase 6-1 テスト
+gcc -O2 -I../../../.. \
+    -o test_tiny_simple \
+    test_tiny_simple.c \
+    ../../../../core/hakmem_tiny_simple.c \
+    -lrt
+```
+
+## 📊 測定結果 (2025-11-02)
+
+### Phase 6-1: Ultra-Simple Fast Path
+
+**bench_tiny_simple (64B LIFO, 10M iterations):**
+```
+Throughput: 478.60 M ops/sec
+Cycles/op:  4.17 cycles
+Hit rate:   100.00%
+```
+
+**比較:**
+- System glibc: ~156-175 M ops/sec (測定により変動)
+- Phase 6-1: **478.60 M ops/sec** (+174% ~ +206%)
+
+## ⚠️ 重要な注意
+
+**Phase 6-1 は既存のHAKMEM allocatorとは別実装です:**
+
+1. **Phase 6-1 (bench_tiny_simple)**
+   - 新しい独立実装 (`hakmem_tiny_simple.c`)
+   - tcache風の ultra-simple design
+   - 478.60 M ops/sec
+
+2. **既存のHAKMEM (bench_tiny_hot_hakmem)**
+   - 現在のmainline実装
+   - Magazine-based design
+   - ~50-60 M ops/sec
+
+Phase 6-1 は**実験的プロトタイプ**で、まだmainlineには統合されていません。
+
+## 🎯 Phase 6 ロードマップ
+
+- [x] **Phase 6-1**: Ultra-Simple Fast Path (完了)
+- [ ] **Phase 6-2**: Comprehensive benchmarking
+- [ ] **Phase 6-3**: Learning layer implementation
+- [ ] **Phase 6-4**: Mainline integration (未定)