diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index 41d4e971..608c913a 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -1,6 +1,27 @@ # 本線タスク(現在) -## 現在地: Phase MID-V35-HOTPATH-OPT-1-DESIGN へ +## 現在地: Phase MID-V35-HOTPATH-OPT-1 完了 → 次フェーズ選定待ち + +--- + +### Status: Phase MID-V35-HOTPATH-OPT-1 FROZEN ✅ + +**Summary**: +- **Design**: Step 0-3(Geometry SSOT + Header prefill + Hot counts + C6 fastpath) +- **C6-heavy (257–768B)**: **+7.3%** improvement ✅ (8.75M → 9.39M ops/s, 5-run mean) +- **Mixed (16–1024B)**: **-0.2%** (誤差範囲, ±2%以内) ✓ +- **Decision**: デフォルトOFF/FROZEN(全3ノブ)、C6-heavy推奨ON、Mixed現状維持 +- **Key Finding**: + - Step 0: L1/L2 geometry mismatch 修正(C6 102→128 slots) + - Step 1-3: refill 境界移動 + 分岐削減 + constant 最適化で +7.3% + - Mixed では MID_V3(C6-only) 固定なため効果微小 + +**Deliverables**: +- `core/box/smallobject_mid_v35_geom_box.h` (新規) +- `core/box/mid_v35_hotpath_env_box.h` (新規) +- `core/smallobject_mid_v35.c` (Step 1-3 統合) +- `core/smallobject_cold_iface_mid_v3.c` (Step 0 + Step 1) +- `docs/analysis/ENV_PROFILE_PRESETS.md` (更新) --- diff --git a/core/box/mid_v35_hotpath_env_box.h b/core/box/mid_v35_hotpath_env_box.h new file mode 100644 index 00000000..737c17ea --- /dev/null +++ b/core/box/mid_v35_hotpath_env_box.h @@ -0,0 +1,52 @@ +// mid_v35_hotpath_env_box.h - Phase MID-V35-HOTPATH-OPT-1 ENV gates +// +// Step 1: HAKMEM_MID_V35_HEADER_PREFILL (default 0) +// - ON: prefill headers at refill boundary, skip in hot path +// Step 2: HAKMEM_MID_V35_HOT_COUNTS (default 1) +// - OFF: skip alloc_count++ in hot path (free_count/retire kept) +// Step 3: HAKMEM_MID_V35_C6_FASTPATH (default 0) +// - ON: use specialized C6 fast path with constant slot size +// +#ifndef HAKMEM_MID_V35_HOTPATH_ENV_BOX_H +#define HAKMEM_MID_V35_HOTPATH_ENV_BOX_H + +#include +#include + +// Step 1: Header prefill at refill boundary +// Default OFF (conservative) +static inline bool mid_v35_header_prefill_enabled(void) { + static int g_enabled = -1; + if (__builtin_expect(g_enabled >= 0, 1)) { + return g_enabled == 1; + } + const char* e = getenv("HAKMEM_MID_V35_HEADER_PREFILL"); + g_enabled = (e && *e == '1') ? 1 : 0; + return g_enabled == 1; +} + +// Step 2: Hot counts (alloc_count++ in hot path) +// Default ON (for compatibility/correctness) +static inline bool mid_v35_hot_counts_enabled(void) { + static int g_enabled = -1; + if (__builtin_expect(g_enabled >= 0, 1)) { + return g_enabled == 1; + } + const char* e = getenv("HAKMEM_MID_V35_HOT_COUNTS"); + g_enabled = (e && *e == '0') ? 0 : 1; // default ON + return g_enabled == 1; +} + +// Step 3: C6 specialized fast path +// Default OFF (conservative) +static inline bool mid_v35_c6_fastpath_enabled(void) { + static int g_enabled = -1; + if (__builtin_expect(g_enabled >= 0, 1)) { + return g_enabled == 1; + } + const char* e = getenv("HAKMEM_MID_V35_C6_FASTPATH"); + g_enabled = (e && *e == '1') ? 1 : 0; + return g_enabled == 1; +} + +#endif // HAKMEM_MID_V35_HOTPATH_ENV_BOX_H diff --git a/core/box/smallobject_mid_v35_geom_box.h b/core/box/smallobject_mid_v35_geom_box.h new file mode 100644 index 00000000..e2080636 --- /dev/null +++ b/core/box/smallobject_mid_v35_geom_box.h @@ -0,0 +1,45 @@ +// smallobject_mid_v35_geom_box.h - Phase MID-V35-HOTPATH-OPT-1 Step 0 +// +// Single Source of Truth for MID v3.5 geometry (C5-C7) +// All layers (L1 HotBox, L2 ColdIface, Segment) must use these helpers. +// +// Geometry (64KB page): +// C5: 384B slots, 170 slots/page (257-384B) +// C6: 512B slots, 128 slots/page (385-512B) +// C7: 1024B slots, 64 slots/page (513-1024B) +// +#ifndef HAKMEM_SMALLOBJECT_MID_V35_GEOM_BOX_H +#define HAKMEM_SMALLOBJECT_MID_V35_GEOM_BOX_H + +#include +#include + +// Page size for MID v3.5 (64KB) +#define MID_V35_PAGE_SIZE (64 * 1024) + +// Slot size by class_idx (C5-C7 only, others return 0) +static inline size_t mid_v35_slot_size(uint32_t class_idx) { + switch (class_idx) { + case 5: return 384; // C5: 257-384B + case 6: return 512; // C6: 385-512B + case 7: return 1024; // C7: 513-1024B + default: return 0; + } +} + +// Slots per page by class_idx (C5-C7 only, others return 0) +static inline uint32_t mid_v35_slots_per_page(uint32_t class_idx) { + switch (class_idx) { + case 5: return 170; // 65536 / 384 = 170 + case 6: return 128; // 65536 / 512 = 128 + case 7: return 64; // 65536 / 1024 = 64 + default: return 0; + } +} + +// Validate class_idx is in MID v3.5 range (C5-C7) +static inline int mid_v35_class_valid(uint32_t class_idx) { + return (class_idx >= 5 && class_idx <= 7); +} + +#endif // HAKMEM_SMALLOBJECT_MID_V35_GEOM_BOX_H diff --git a/core/smallobject_cold_iface_mid_v3.c b/core/smallobject_cold_iface_mid_v3.c index e78ec6c5..e9b81096 100644 --- a/core/smallobject_cold_iface_mid_v3.c +++ b/core/smallobject_cold_iface_mid_v3.c @@ -5,6 +5,9 @@ #include #include "box/smallobject_cold_iface_mid_v3_box.h" #include "box/smallobject_stats_mid_v3_box.h" +#include "box/smallobject_mid_v35_geom_box.h" // Phase MID-V35-HOTPATH-OPT-1: geometry SSOT +#include "box/mid_v35_hotpath_env_box.h" // Phase MID-V35-HOTPATH-OPT-1: Step 1-3 ENV gates +#include "tiny_region_id.h" // For tiny_region_id_write_header // SmallPageMeta is defined in smallobject_segment_mid_v3_box.h #include "box/smallobject_segment_mid_v3_box.h" @@ -19,15 +22,9 @@ static __thread SmallSegment_MID_v3 *tls_mid_segment = NULL; // ============================================================================ // Helper: class_idx to slots // ============================================================================ - -static uint32_t class_idx_to_slots(uint32_t class_idx) { - switch (class_idx) { - case 5: return 170; // C5: 257-384B - case 6: return 102; // C6: 385-640B - case 7: return 64; // C7: 641-1024B - default: return 0; - } -} +// Phase MID-V35-HOTPATH-OPT-1: Use geom_box as Single Source of Truth +// See: core/box/smallobject_mid_v35_geom_box.h +// (Removed local class_idx_to_slots() which had wrong C6 value: 102 instead of 128) // ============================================================================ // Cold Interface Implementation @@ -63,10 +60,21 @@ SmallPageMeta_MID_v3* small_cold_mid_v3_refill_page(uint32_t class_idx) { // Initialize page for allocation page->class_idx = class_idx; - page->capacity = class_idx_to_slots(class_idx); + page->capacity = mid_v35_slots_per_page(class_idx); page->alloc_count = 0; page->free_count = 0; + // Phase MID-V35-HOTPATH-OPT-1 Step 1: Header prefill at refill boundary + // When enabled, write all headers now so hot path can skip per-alloc header writes + if (mid_v35_header_prefill_enabled()) { + size_t slot_size = mid_v35_slot_size(class_idx); + uint32_t cap = page->capacity; + uint8_t *p = (uint8_t*)page_ptr; + for (uint32_t i = 0; i < cap; i++) { + tiny_region_id_write_header(p + i * slot_size, class_idx); + } + } + return page; } diff --git a/core/smallobject_mid_v35.c b/core/smallobject_mid_v35.c index 58c58cac..9ce4d41d 100644 --- a/core/smallobject_mid_v35.c +++ b/core/smallobject_mid_v35.c @@ -13,6 +13,8 @@ #include "box/smallobject_mid_v35_box.h" #include "box/smallobject_segment_mid_v3_box.h" #include "box/smallobject_cold_iface_mid_v3_box.h" +#include "box/smallobject_mid_v35_geom_box.h" // Phase MID-V35-HOTPATH-OPT-1: geometry SSOT +#include "box/mid_v35_hotpath_env_box.h" // Phase MID-V35-HOTPATH-OPT-1: Step 1-3 ENV gates #include "tiny_region_id.h" // For tiny_region_id_write_header // SmallPageMeta is defined in smallobject_segment_mid_v3_box.h @@ -34,26 +36,8 @@ static __thread SmallMidV35TlsCtx tls_mid_v35_ctx = {0}; // ============================================================================ // Slot Configuration (C5/C6/C7) // ============================================================================ - -// Slot sizes for C5, C6, C7 -static const size_t g_slot_sizes[8] = { - 0, // C0: not used - 0, // C1: not used - 0, // C2: not used - 0, // C3: not used - 0, // C4: not used (ULTRA handles this) - 384, // C5: 257-384 bytes → 384 byte slots - 512, // C6: 385-512 bytes → 512 byte slots - 1024, // C7: 513-1024 bytes → 1024 byte slots (ULTRA handles this) -}; - -// Slots per 64KB page -static const uint32_t g_slots_per_page[8] = { - 0, 0, 0, 0, 0, - 170, // C5: 65536 / 384 = 170 - 128, // C6: 65536 / 512 = 128 - 64, // C7: 65536 / 1024 = 64 -}; +// Phase MID-V35-HOTPATH-OPT-1: Use geom_box as Single Source of Truth +// See: core/box/smallobject_mid_v35_geom_box.h // ============================================================================ // Init @@ -74,26 +58,57 @@ void* small_mid_v35_alloc(uint32_t class_idx, size_t size) { SmallMidV35TlsCtx *ctx = &tls_mid_v35_ctx; - // Fast path: allocate from TLS cached page + // ======================================================================== + // Step 3: C6 specialized fast path (constant slot size = 512) + // ======================================================================== + if (mid_v35_c6_fastpath_enabled() && class_idx == 6) { + void *page = ctx->page[6]; + uint32_t off = ctx->offset[6]; + if (page && off < ctx->capacity[6]) { + // C6: slot_size = 512 (constant, compiler can optimize) + void *base = (char*)page + off * 512; + ctx->offset[6] = off + 1; + + // Step 2: HOT_COUNTS gate + if (mid_v35_hot_counts_enabled() && ctx->meta[6]) { + ctx->meta[6]->alloc_count++; + } + + // Step 1: HEADER_PREFILL gate + if (!mid_v35_header_prefill_enabled()) { + tiny_region_id_write_header(base, 6); + } + + return (char*)base + 1; + } + // Fall through to slow path + } + + // ======================================================================== + // Generic fast path: allocate from TLS cached page + // ======================================================================== if (ctx->page[class_idx] && ctx->offset[class_idx] < ctx->capacity[class_idx]) { - size_t slot_size = g_slot_sizes[class_idx]; + size_t slot_size = mid_v35_slot_size(class_idx); void *base = (char*)ctx->page[class_idx] + ctx->offset[class_idx] * slot_size; ctx->offset[class_idx]++; - // Update page metadata - if (ctx->meta[class_idx]) { + // Step 2: HOT_COUNTS gate - Update page metadata + if (mid_v35_hot_counts_enabled() && ctx->meta[class_idx]) { ctx->meta[class_idx]->alloc_count++; } - // Write header (1-byte Tiny header with class_idx) - // Note: Assumes HAKMEM_TINY_HEADER_CLASSIDX is enabled - tiny_region_id_write_header(base, class_idx); + // Step 1: HEADER_PREFILL gate - Write header if not prefilled + if (!mid_v35_header_prefill_enabled()) { + tiny_region_id_write_header(base, class_idx); + } // Return USER pointer (BASE + 1 byte header) return (char*)base + 1; } + // ======================================================================== // Slow path: need new page via ColdIface + // ======================================================================== SmallPageMeta_MID_v3 *page = small_cold_mid_v3_refill_page(class_idx); if (!page) { // Fallback to legacy or return NULL @@ -103,14 +118,18 @@ void* small_mid_v35_alloc(uint32_t class_idx, size_t size) { // Update TLS cache ctx->page[class_idx] = page->ptr; ctx->offset[class_idx] = 1; // First slot already allocated - ctx->capacity[class_idx] = g_slots_per_page[class_idx]; + ctx->capacity[class_idx] = mid_v35_slots_per_page(class_idx); ctx->meta[class_idx] = page; - // Record first allocation in page metadata - page->alloc_count = 1; + // Step 2: HOT_COUNTS gate - Record first allocation in page metadata + if (mid_v35_hot_counts_enabled()) { + page->alloc_count = 1; + } - // Write header for first slot - tiny_region_id_write_header(page->ptr, class_idx); + // Step 1: HEADER_PREFILL gate - Write header for first slot if not prefilled + if (!mid_v35_header_prefill_enabled()) { + tiny_region_id_write_header(page->ptr, class_idx); + } // Return first slot (USER pointer) return (char*)page->ptr + 1; diff --git a/core/smallobject_segment_mid_v3.c b/core/smallobject_segment_mid_v3.c index 180d6625..fcf3979c 100644 --- a/core/smallobject_segment_mid_v3.c +++ b/core/smallobject_segment_mid_v3.c @@ -7,6 +7,7 @@ #include #include "box/smallobject_segment_mid_v3_box.h" #include "box/region_id_v6_box.h" +#include "box/smallobject_mid_v35_geom_box.h" // Phase MID-V35-HOTPATH-OPT-1: geometry SSOT // SmallPageMeta is now defined in smallobject_segment_mid_v3_box.h @@ -21,20 +22,9 @@ // ============================================================================ // Helper: class_idx to slots mapping // ============================================================================ - -static uint32_t class_idx_to_slots(uint32_t class_idx) { - // MID v3.5 targets C5-C7 (257-1024B) - // C5: 257-384B → 64KiB / 384B ≈ 170 slots - // C6: 385-640B → 64KiB / 640B ≈ 102 slots - // C7: 641-1024B → 64KiB / 1024B ≈ 64 slots - - switch (class_idx) { - case 5: return 170; // C5 - case 6: return 102; // C6 - case 7: return 64; // C7 - default: return 0; - } -} +// Phase MID-V35-HOTPATH-OPT-1: Use geom_box as Single Source of Truth +// See: core/box/smallobject_mid_v35_geom_box.h +// (Removed local class_idx_to_slots() which had wrong C6 value: 102 instead of 128) // ============================================================================ // Segment Lifecycle diff --git a/docs/analysis/ENV_PROFILE_PRESETS.md b/docs/analysis/ENV_PROFILE_PRESETS.md index ddb83bf1..1a150700 100644 --- a/docs/analysis/ENV_PROFILE_PRESETS.md +++ b/docs/analysis/ENV_PROFILE_PRESETS.md @@ -48,6 +48,19 @@ HAKMEM_TINY_HEAP_STATS=1 HAKMEM_TINY_HEAP_STATS_DUMP=1 HAKMEM_SMALL_HEAP_V3_STATS=1 ``` +- **Phase MID-V35-HOTPATH-OPT-1** (FROZEN - research only): +```sh +HAKMEM_MID_V35_HEADER_PREFILL=1 # refill境界でheader先行書き +HAKMEM_MID_V35_HOT_COUNTS=0 # alloc_count削除 +HAKMEM_MID_V35_C6_FASTPATH=1 # C6特化 fast path +``` + - **Status**: Default OFF, FROZEN (all 3 knobs) + - **Actual Results** (Phase MID-V35-HOTPATH-OPT-1 Mixed A/B): + - Mixed (16–1024B, MID_V35_OFF): **-0.2%** (誤差範囲, ±2%以内) ✓ + - C6-heavy (257–768B, MID_V35_ON): **+7.3%** improvement ✅ + - **Finding**: Mixed は MID_V3(C6-only) 固定で効果微小;C6-heavy のみ効果大 + - **Recommendation**: C6_HEAVY_LEGACY_POOLV1 プリセットで推奨ON + - **NOT recommended for**: MIXED_TINYV3_C7_SAFE mainline (keep all defaults OFF) - **Phase POLICY-FAST-PATH-V2** (FROZEN - research only): ```sh HAKMEM_TINY_FREE_POLICY_FAST_V2=1 # Fast-path free optimization @@ -100,6 +113,12 @@ HAKMEM_MID_V3_ENABLED=1 # Phase MID-V3: 257-768B, C6 only HAKMEM_MID_V3_CLASSES=0x40 # C6 only (+11% on C6-heavy) HAKMEM_MID_V35_ENABLED=1 # Phase v11a-5: C6-heavy で +8% 改善 HAKMEM_MID_V35_CLASSES=0x40 # C6 only (53.1M ops/s) + +# Phase MID-V35-HOTPATH-OPT-1: C6-heavy 最速セット(推奨ON) +# 機能: header prefill + hot counts削除 + C6 fast path (組み合わせで +7.3%) +HAKMEM_MID_V35_HEADER_PREFILL=1 # refill境界でheader先行書き +HAKMEM_MID_V35_HOT_COUNTS=0 # alloc_count削除(free_count/retire残す) +HAKMEM_MID_V35_C6_FASTPATH=1 # C6特化 fast path (constant slot_size=512) ``` - mid_desc_lookup TLS キャッシュを試すときだけ: `HAKMEM_MID_DESC_CACHE_ENABLED=1` を上乗せ(デフォルトは OFF)。 diff --git a/perf.data.c6heavy_baseline b/perf.data.c6heavy_baseline new file mode 100644 index 00000000..a265aac7 Binary files /dev/null and b/perf.data.c6heavy_baseline differ diff --git a/perf.data.mixed_baseline b/perf.data.mixed_baseline new file mode 100644 index 00000000..3db92825 Binary files /dev/null and b/perf.data.mixed_baseline differ