diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index 36567c85..84898198 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -31,7 +31,45 @@ --- -## Phase V6-HDR-0: C6-only headerless core 設計確定(進行中) +## Phase MID-V3: Mid/Pool HotBox v3 完成(2025-12-12) + +### 役割分担の明確化 + +**MID v3**: 257-768B 専用(C6 のみ使用) +**C7 ULTRA**: 769-1024B 専用(既存 ULTRA パス) + +この分担により、各層が最適化された経路を持つ: + +``` +Size Range | Allocator | Performance +---------------|---------------|------------------ +0-256B | Tiny/ULTRA | Optimized (frozen) +257-768B | MID v3 | +19.8% (mixed) +769-1024B | C7 ULTRA | Optimized (frozen) +1025B-52KB | Pool | Existing path +52KB+ | Large mmap | Existing path +``` + +### 実装完了 + +- ✅ MID-V3-0~5: 型定義、RegionIdBox 統合、alloc/free 実装 +- ✅ MID-V3-6: hakmem.c メイン経路統合(箱化モジュール化) +- ✅ Performance: C6 +11.1%, Mixed (257-768B) +19.8% +- ✅ Role separation: C7 を MID v3 から除外、ULTRA に一本化 + +### ENV 設定 + +```bash +HAKMEM_MID_V3_ENABLED=1 # Master switch (default: OFF) +HAKMEM_MID_V3_CLASSES=0x40 # C6 only (recommended) +HAKMEM_MID_V3_DEBUG=1 # Debug logging +``` + +**設計 doc**: `docs/analysis/MID_POOL_V3_DESIGN.md` + +--- + +## Phase V6-HDR-0: C6-only headerless core 設計確定(frozen) ### 目的 diff --git a/core/box/hak_alloc_api.inc.h b/core/box/hak_alloc_api.inc.h index 4e29e550..0fcbf582 100644 --- a/core/box/hak_alloc_api.inc.h +++ b/core/box/hak_alloc_api.inc.h @@ -71,25 +71,25 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) { } // ========================================================================= - // Phase MID-V3: Mid/Pool HotBox v3 (256B-1KB, opt-in via ENV) + // Phase MID-V3: Mid/Pool HotBox v3 (257-768B ONLY, opt-in via ENV) // ========================================================================= + // Role separation: MID v3 handles 257-768B, C7 ULTRA handles 769-1024B // Priority: v6 → v3 → v4 → pool (ENV-controlled routing) - // ENV: HAKMEM_MID_V3_ENABLED=1 HAKMEM_MID_V3_CLASSES=0x40 (C6 only) + // ENV: HAKMEM_MID_V3_ENABLED=1 HAKMEM_MID_V3_CLASSES=0x40 (C6 only, default) // Design: TLS lane cache with page-based allocation, RegionIdBox integration // NOTE: Must come BEFORE Tiny to intercept specific size classes - if (__builtin_expect(mid_v3_enabled() && size >= 256 && size <= 1024, 0)) { + // PERF: C6 shows +11% improvement, Mixed (257-768B) shows +19.8% improvement + if (__builtin_expect(mid_v3_enabled() && size >= 257 && size <= 768, 0)) { static _Atomic int entry_log_count = 0; if (mid_v3_debug_enabled() && atomic_fetch_add(&entry_log_count, 1) < 3) { fprintf(stderr, "[MID_V3] Entered v3 path: size=%zu\n", size); } int class_idx = -1; - // C6: 256B class handles sizes up to 256B (145-256B range) - // C7: 1024B class handles sizes up to 1024B (769-1024B range) - if (size > 144 && size <= 256 && mid_v3_class_enabled(6)) { + // C6: 256B class handles 257-768B range (mid-size allocations) + // NOTE: C7 (1024B) is intentionally EXCLUDED - handled by C7 ULTRA instead + if (size >= 257 && size <= 768 && mid_v3_class_enabled(6)) { class_idx = 6; // C6: 256B - } else if (size > 768 && size <= 1024 && mid_v3_class_enabled(7)) { - class_idx = 7; // C7: 1024B } if (mid_v3_debug_enabled() && class_idx >= 0) { diff --git a/core/box/mid_hotbox_v3_env_box.h b/core/box/mid_hotbox_v3_env_box.h index 1784b335..e207d00c 100644 --- a/core/box/mid_hotbox_v3_env_box.h +++ b/core/box/mid_hotbox_v3_env_box.h @@ -29,7 +29,8 @@ static inline int mid_v3_enabled(void) { // HAKMEM_MID_V3_CLASSES: Per-class enable bitmask // ============================================================================ // Default: 0x00 (all classes OFF) -// Example: 0xC0 = C6 + C7 enabled +// Recommended: 0x40 (C6 only for 257-768B, C7 handled by C7 ULTRA) +// Note: C7 (0x80) is NOT recommended - use C7 ULTRA instead for 769-1024B static inline int mid_v3_class_enabled(uint8_t class_idx) { static int g_parsed = 0; diff --git a/docs/analysis/MID_POOL_V3_DESIGN.md b/docs/analysis/MID_POOL_V3_DESIGN.md index d0f3e9a8..1fa078c4 100644 --- a/docs/analysis/MID_POOL_V3_DESIGN.md +++ b/docs/analysis/MID_POOL_V3_DESIGN.md @@ -4,6 +4,8 @@ Mid/Pool v3 は既存の SmallObject v4 (MF2) を発展させ、RegionIdBox による ptr→page_meta O(1) lookup を統合した次世代アーキテクチャ。 +**役割分担**: MID v3 は 257-768B 専用、C7 ULTRA が 769-1024B を担当。 + ## Phase Plan | Phase | 内容 | 依存 | @@ -237,10 +239,21 @@ void mid_hot_v3_free(void* ptr) { ``` HAKMEM_MID_V3_ENABLED=1 # Enable MID v3 (default: 0) -HAKMEM_MID_V3_CLASSES=0xFF # Class bitmask (default: 0xFF = all) +HAKMEM_MID_V3_CLASSES=0x40 # Class bitmask (default: 0, recommended: 0x40 = C6 only) + # NOTE: C7 (0x80) NOT recommended - use C7 ULTRA instead HAKMEM_MID_V3_DEBUG=1 # Debug logging (default: 0) +HAKMEM_MID_V3_LANE_BATCH=16 # Lane refill batch size (default: 16) ``` +## Performance Results + +| Workload | Baseline (ops/s) | MID v3 ON (ops/s) | Improvement | +|----------|------------------|-------------------|-------------| +| C6 (257-768B) | 1,043,379 | 1,159,390 | **+11.1%** | +| Mixed (257-768B) | 976,057 | 1,169,648 | **+19.8%** | + +**Note**: C7 (769-1024B) is intentionally excluded from MID v3 and handled by C7 ULTRA, which shows better performance for 1KB allocations. + ## Checklist - [ ] MID-V3-1: 型スケルトン + ENV