Phase MID-V35-HOTPATH-OPT-1 complete: +7.3% on C6-heavy

Step 0: Geometry SSOT
  - New: core/box/smallobject_mid_v35_geom_box.h (L1/L2 consistency)
  - Fix: C6 slots/page 102→128 in L2 (smallobject_cold_iface_mid_v3.c)
  - Applied: smallobject_mid_v35.c, smallobject_segment_mid_v3.c

Step 1-3: ENV gates for hotpath optimizations
  - New: core/box/mid_v35_hotpath_env_box.h
    * HAKMEM_MID_V35_HEADER_PREFILL (default 0)
    * HAKMEM_MID_V35_HOT_COUNTS (default 1)
    * HAKMEM_MID_V35_C6_FASTPATH (default 0)
  - Implementation: smallobject_mid_v35.c
    * Header prefill at refill boundary (Step 1)
    * Gated alloc_count++ in hot path (Step 2)
    * C6 specialized fast path with constant slot_size (Step 3)

A/B Results:
  C6-heavy (257–768B): 8.75M→9.39M ops/s (+7.3%, 5-run mean) 
  Mixed (16–1024B): 9.98M→9.96M ops/s (-0.2%, within noise) ✓

Decision: FROZEN - defaults OFF, C6-heavy推奨ON, Mixed現状維持
Documentation: ENV_PROFILE_PRESETS.md updated

🤖 Generated with Claude Code

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-12 19:19:25 +09:00
parent e95e61f0ff
commit fe70e3baf5
9 changed files with 211 additions and 57 deletions

View File

@ -13,6 +13,8 @@
#include "box/smallobject_mid_v35_box.h"
#include "box/smallobject_segment_mid_v3_box.h"
#include "box/smallobject_cold_iface_mid_v3_box.h"
#include "box/smallobject_mid_v35_geom_box.h" // Phase MID-V35-HOTPATH-OPT-1: geometry SSOT
#include "box/mid_v35_hotpath_env_box.h" // Phase MID-V35-HOTPATH-OPT-1: Step 1-3 ENV gates
#include "tiny_region_id.h" // For tiny_region_id_write_header
// SmallPageMeta is defined in smallobject_segment_mid_v3_box.h
@ -34,26 +36,8 @@ static __thread SmallMidV35TlsCtx tls_mid_v35_ctx = {0};
// ============================================================================
// Slot Configuration (C5/C6/C7)
// ============================================================================
// Slot sizes for C5, C6, C7
static const size_t g_slot_sizes[8] = {
0, // C0: not used
0, // C1: not used
0, // C2: not used
0, // C3: not used
0, // C4: not used (ULTRA handles this)
384, // C5: 257-384 bytes → 384 byte slots
512, // C6: 385-512 bytes → 512 byte slots
1024, // C7: 513-1024 bytes → 1024 byte slots (ULTRA handles this)
};
// Slots per 64KB page
static const uint32_t g_slots_per_page[8] = {
0, 0, 0, 0, 0,
170, // C5: 65536 / 384 = 170
128, // C6: 65536 / 512 = 128
64, // C7: 65536 / 1024 = 64
};
// Phase MID-V35-HOTPATH-OPT-1: Use geom_box as Single Source of Truth
// See: core/box/smallobject_mid_v35_geom_box.h
// ============================================================================
// Init
@ -74,26 +58,57 @@ void* small_mid_v35_alloc(uint32_t class_idx, size_t size) {
SmallMidV35TlsCtx *ctx = &tls_mid_v35_ctx;
// Fast path: allocate from TLS cached page
// ========================================================================
// Step 3: C6 specialized fast path (constant slot size = 512)
// ========================================================================
if (mid_v35_c6_fastpath_enabled() && class_idx == 6) {
void *page = ctx->page[6];
uint32_t off = ctx->offset[6];
if (page && off < ctx->capacity[6]) {
// C6: slot_size = 512 (constant, compiler can optimize)
void *base = (char*)page + off * 512;
ctx->offset[6] = off + 1;
// Step 2: HOT_COUNTS gate
if (mid_v35_hot_counts_enabled() && ctx->meta[6]) {
ctx->meta[6]->alloc_count++;
}
// Step 1: HEADER_PREFILL gate
if (!mid_v35_header_prefill_enabled()) {
tiny_region_id_write_header(base, 6);
}
return (char*)base + 1;
}
// Fall through to slow path
}
// ========================================================================
// Generic fast path: allocate from TLS cached page
// ========================================================================
if (ctx->page[class_idx] && ctx->offset[class_idx] < ctx->capacity[class_idx]) {
size_t slot_size = g_slot_sizes[class_idx];
size_t slot_size = mid_v35_slot_size(class_idx);
void *base = (char*)ctx->page[class_idx] + ctx->offset[class_idx] * slot_size;
ctx->offset[class_idx]++;
// Update page metadata
if (ctx->meta[class_idx]) {
// Step 2: HOT_COUNTS gate - Update page metadata
if (mid_v35_hot_counts_enabled() && ctx->meta[class_idx]) {
ctx->meta[class_idx]->alloc_count++;
}
// Write header (1-byte Tiny header with class_idx)
// Note: Assumes HAKMEM_TINY_HEADER_CLASSIDX is enabled
tiny_region_id_write_header(base, class_idx);
// Step 1: HEADER_PREFILL gate - Write header if not prefilled
if (!mid_v35_header_prefill_enabled()) {
tiny_region_id_write_header(base, class_idx);
}
// Return USER pointer (BASE + 1 byte header)
return (char*)base + 1;
}
// ========================================================================
// Slow path: need new page via ColdIface
// ========================================================================
SmallPageMeta_MID_v3 *page = small_cold_mid_v3_refill_page(class_idx);
if (!page) {
// Fallback to legacy or return NULL
@ -103,14 +118,18 @@ void* small_mid_v35_alloc(uint32_t class_idx, size_t size) {
// Update TLS cache
ctx->page[class_idx] = page->ptr;
ctx->offset[class_idx] = 1; // First slot already allocated
ctx->capacity[class_idx] = g_slots_per_page[class_idx];
ctx->capacity[class_idx] = mid_v35_slots_per_page(class_idx);
ctx->meta[class_idx] = page;
// Record first allocation in page metadata
page->alloc_count = 1;
// Step 2: HOT_COUNTS gate - Record first allocation in page metadata
if (mid_v35_hot_counts_enabled()) {
page->alloc_count = 1;
}
// Write header for first slot
tiny_region_id_write_header(page->ptr, class_idx);
// Step 1: HEADER_PREFILL gate - Write header for first slot if not prefilled
if (!mid_v35_header_prefill_enabled()) {
tiny_region_id_write_header(page->ptr, class_idx);
}
// Return first slot (USER pointer)
return (char*)page->ptr + 1;