Phase v7-5a: Hot path stats removal (C6 v7 極限最適化)
- Remove per-page stats from hot path (alloc_count, free_count, live_current) - Add ENV-gated global atomic stats (HAKMEM_V7_HOT_STATS) - Stats now collected only at retire time (cold path) - Header write kept at alloc time (freelist overlaps block[0]) A/B Result: -4.3% overhead → ±0% (target: legacy ±2%) v7 OFF avg: 9.26M ops/s, v7 ON avg: 9.27M ops/s (+0.15%) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -1,9 +1,14 @@
|
||||
// smallobject_hotbox_v7_box.h - SmallObject HotBox v7 (Phase v7-2: C6-only impl)
|
||||
// smallobject_hotbox_v7_box.h - SmallObject HotBox v7 (Phase v7-5a: Hot path極限最適化)
|
||||
//
|
||||
// Role:
|
||||
// - SmallObject v7 fast path for alloc/free
|
||||
// - C6-only implementation (512B blocks, 64KiB pages, 2MiB segments)
|
||||
// - Uses SmallHeapCtx_v7 + SmallSegment_v7 + ColdIface_v7
|
||||
//
|
||||
// v7-5a optimizations:
|
||||
// - Stats (alloc_count, free_count, live_current) removed from hot path
|
||||
// - Global atomic stats gated by ENV (HAKMEM_V7_HOT_STATS)
|
||||
// - Header write kept (required due to intrusive freelist overlapping block[0])
|
||||
|
||||
#pragma once
|
||||
|
||||
@ -11,6 +16,7 @@
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h> // for getenv()
|
||||
#include "smallsegment_v7_box.h"
|
||||
#include "smallobject_cold_iface_v7_box.h"
|
||||
#include "region_id_v6_box.h"
|
||||
@ -22,7 +28,7 @@
|
||||
#endif
|
||||
|
||||
// ============================================================================
|
||||
// Debug/Observe Support
|
||||
// Debug/Observe Support (v7-5a: ENV-gated for hot path)
|
||||
// ============================================================================
|
||||
|
||||
// V7 stats functions (defined in smallobject_cold_iface_v7.c)
|
||||
@ -31,6 +37,23 @@ extern void small_v7_stat_free(void);
|
||||
extern void small_v7_stat_refill(void);
|
||||
extern void small_v7_stat_retire(void);
|
||||
|
||||
// v7-5a: ENV gate for hot path stats (default OFF for performance)
|
||||
// Set HAKMEM_V7_HOT_STATS=1 to enable per-alloc/free atomic counters
|
||||
static inline int small_v7_hot_stats_enabled(void) {
|
||||
static int g_enabled = -1;
|
||||
if (__builtin_expect(g_enabled < 0, 0)) {
|
||||
const char* e = getenv("HAKMEM_V7_HOT_STATS");
|
||||
g_enabled = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g_enabled;
|
||||
}
|
||||
|
||||
// Conditional stat increment (only if ENV enabled)
|
||||
#define SMALL_V7_HOT_STAT_ALLOC() \
|
||||
do { if (__builtin_expect(small_v7_hot_stats_enabled(), 0)) small_v7_stat_alloc(); } while(0)
|
||||
#define SMALL_V7_HOT_STAT_FREE() \
|
||||
do { if (__builtin_expect(small_v7_hot_stats_enabled(), 0)) small_v7_stat_free(); } while(0)
|
||||
|
||||
// Class mismatch logging (for hint validation)
|
||||
static inline void small_v7_log_class_mismatch(void* ptr, uint8_t hint, uint8_t actual) {
|
||||
// TODO: Make this ENV-controlled
|
||||
@ -44,7 +67,7 @@ static inline void small_v7_log_class_mismatch(void* ptr, uint8_t hint, uint8_t
|
||||
// Alloc Fast Path
|
||||
// ============================================================================
|
||||
|
||||
// small_heap_alloc_fast_v7() - v7 alloc (C6-only for v7-2)
|
||||
// small_heap_alloc_fast_v7() - v7 alloc (C6-only, v7-5a: Hot path極限最適化)
|
||||
//
|
||||
// Flow:
|
||||
// 1. Get TLS context
|
||||
@ -53,6 +76,10 @@ static inline void small_v7_log_class_mismatch(void* ptr, uint8_t hint, uint8_t
|
||||
// 4. If no partial, call ColdIface refill
|
||||
// 5. Pop from freelist and return USER ptr
|
||||
//
|
||||
// v7-5a optimizations:
|
||||
// - Per-page stats (alloc_count, live_current) removed from hot path
|
||||
// - Global atomic stats gated by ENV (HAKMEM_V7_HOT_STATS)
|
||||
//
|
||||
static inline void* small_heap_alloc_fast_v7(size_t size, uint8_t class_idx) {
|
||||
// v7-2: Only C6 is implemented
|
||||
if (unlikely(class_idx != SMALL_V7_C6_CLASS_IDX)) {
|
||||
@ -69,17 +96,13 @@ static inline void* small_heap_alloc_fast_v7(size_t size, uint8_t class_idx) {
|
||||
p->free_list = *(void**)base;
|
||||
p->used++;
|
||||
|
||||
// Update stats
|
||||
p->alloc_count++;
|
||||
p->live_current++;
|
||||
if (p->live_current > p->peak_live) {
|
||||
p->peak_live = p->live_current;
|
||||
}
|
||||
|
||||
// Write header (HEADER_MAGIC | class_idx) for front compatibility
|
||||
// Note: Cannot move to carve time due to intrusive freelist overlapping block[0]
|
||||
((uint8_t*)base)[0] = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
|
||||
|
||||
small_v7_stat_alloc();
|
||||
// v7-5a: Stats moved to cold path (ENV-gated only)
|
||||
SMALL_V7_HOT_STAT_ALLOC();
|
||||
|
||||
// Return USER ptr (base + 1 for header compatibility with front)
|
||||
return (uint8_t*)base + 1;
|
||||
}
|
||||
@ -96,16 +119,10 @@ static inline void* small_heap_alloc_fast_v7(size_t size, uint8_t class_idx) {
|
||||
p->free_list = *(void**)base;
|
||||
p->used++;
|
||||
|
||||
p->alloc_count++;
|
||||
p->live_current++;
|
||||
if (p->live_current > p->peak_live) {
|
||||
p->peak_live = p->live_current;
|
||||
}
|
||||
|
||||
// Write header (HEADER_MAGIC | class_idx) for front compatibility
|
||||
// Write header
|
||||
((uint8_t*)base)[0] = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
|
||||
|
||||
small_v7_stat_alloc();
|
||||
SMALL_V7_HOT_STAT_ALLOC();
|
||||
return (uint8_t*)base + 1;
|
||||
}
|
||||
}
|
||||
@ -124,16 +141,10 @@ static inline void* small_heap_alloc_fast_v7(size_t size, uint8_t class_idx) {
|
||||
p->free_list = *(void**)base;
|
||||
p->used++;
|
||||
|
||||
p->alloc_count++;
|
||||
p->live_current++;
|
||||
if (p->live_current > p->peak_live) {
|
||||
p->peak_live = p->live_current;
|
||||
}
|
||||
|
||||
// Write header (HEADER_MAGIC | class_idx) for front compatibility
|
||||
// Write header
|
||||
((uint8_t*)base)[0] = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
|
||||
|
||||
small_v7_stat_alloc();
|
||||
SMALL_V7_HOT_STAT_ALLOC();
|
||||
return (uint8_t*)base + 1;
|
||||
}
|
||||
|
||||
@ -141,12 +152,15 @@ static inline void* small_heap_alloc_fast_v7(size_t size, uint8_t class_idx) {
|
||||
// Free Fast Path
|
||||
// ============================================================================
|
||||
|
||||
// small_heap_free_fast_v7() - v7 free (C6-only, Phase v7-3: TLS fast path)
|
||||
// small_heap_free_fast_v7() - v7 free (C6-only, v7-5a: Hot path極限最適化)
|
||||
//
|
||||
// Flow:
|
||||
// 1. Same-page cache hit (fastest)
|
||||
// 2. TLS segment hint hit (skip RegionIdBox)
|
||||
// 3. RegionIdBox fallback (cold path)
|
||||
// 1. TLS segment hint hit (skip RegionIdBox)
|
||||
// 2. RegionIdBox fallback (cold path)
|
||||
//
|
||||
// v7-5a optimizations:
|
||||
// - Stats (free_count, live_current) removed from hot path
|
||||
// - Global atomic stats gated by ENV
|
||||
//
|
||||
// @param ptr: USER pointer to free
|
||||
// @param class_idx_hint: Class index hint from front/header (may be ignored)
|
||||
@ -190,9 +204,7 @@ static inline bool small_heap_free_fast_v7(void* ptr, uint8_t class_idx_hint) {
|
||||
*(void**)base = page->free_list;
|
||||
page->free_list = base;
|
||||
|
||||
// Update stats
|
||||
page->free_count++;
|
||||
page->live_current--;
|
||||
// v7-5a: Stats removed from hot path
|
||||
|
||||
// Retire if empty
|
||||
if (unlikely(--page->used == 0)) {
|
||||
@ -200,7 +212,7 @@ static inline bool small_heap_free_fast_v7(void* ptr, uint8_t class_idx_hint) {
|
||||
small_cold_v7_retire_page(ctx, page);
|
||||
}
|
||||
|
||||
small_v7_stat_free();
|
||||
SMALL_V7_HOT_STAT_FREE();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -243,9 +255,7 @@ regionid_fallback:
|
||||
*(void**)base = page->free_list;
|
||||
page->free_list = base;
|
||||
|
||||
// Update stats
|
||||
page->free_count++;
|
||||
page->live_current--;
|
||||
// v7-5a: Stats removed from hot path
|
||||
|
||||
// Decrement used count
|
||||
if (unlikely(--page->used == 0)) {
|
||||
@ -253,7 +263,7 @@ regionid_fallback:
|
||||
small_cold_v7_retire_page(ctx, page);
|
||||
}
|
||||
|
||||
small_v7_stat_free();
|
||||
SMALL_V7_HOT_STAT_FREE();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user