Add Page Box layer for C7 class optimization
- Implement tiny_page_box.c/h: per-thread page cache between UC and Shared Pool - Integrate Page Box into Unified Cache refill path - Remove legacy SuperSlab implementation (merged into smallmid) - Add HAKMEM_TINY_PAGE_BOX_CLASSES env var for selective class enabling - Update bench_random_mixed.c with Page Box statistics Current status: Implementation safe, no regressions. Page Box ON/OFF shows minimal difference - pool strategy needs tuning. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -24,12 +24,22 @@ _Atomic uint64_t g_sp_stage2_lock_acquired_global = 0;
|
||||
_Atomic uint64_t g_sp_stage3_lock_acquired_global = 0;
|
||||
_Atomic uint64_t g_sp_alloc_lock_contention_global = 0;
|
||||
|
||||
// Per-class lock acquisition statistics(Tiny クラス別の lock 負荷観測用)
|
||||
_Atomic uint64_t g_sp_stage2_lock_acquired_by_class[TINY_NUM_CLASSES_SS] = {0};
|
||||
_Atomic uint64_t g_sp_stage3_lock_acquired_by_class[TINY_NUM_CLASSES_SS] = {0};
|
||||
|
||||
// Check if measurement is enabled (cached)
|
||||
static inline int sp_measure_enabled(void) {
|
||||
static int g_measure = -1;
|
||||
if (__builtin_expect(g_measure == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_MEASURE_UNIFIED_CACHE");
|
||||
g_measure = (e && *e && *e != '0') ? 1 : 0;
|
||||
if (g_measure == 1) {
|
||||
// Measurement が ON のときは per-class stage stats も有効化する
|
||||
// (Stage1/2/3 ヒット数は g_sp_stage*_hits に集計される)
|
||||
extern int g_sp_stage_stats_enabled;
|
||||
g_sp_stage_stats_enabled = 1;
|
||||
}
|
||||
}
|
||||
return g_measure;
|
||||
}
|
||||
@ -319,8 +329,13 @@ stage2_fallback:
|
||||
|
||||
// Performance measurement: count Stage 2 lock acquisitions
|
||||
if (__builtin_expect(sp_measure_enabled(), 0)) {
|
||||
atomic_fetch_add_explicit(&g_sp_stage2_lock_acquired_global, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_sp_alloc_lock_contention_global, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_sp_stage2_lock_acquired_global,
|
||||
1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_sp_alloc_lock_contention_global,
|
||||
1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(
|
||||
&g_sp_stage2_lock_acquired_by_class[class_idx],
|
||||
1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
// Update SuperSlab metadata under mutex
|
||||
@ -408,8 +423,13 @@ stage2_scan:
|
||||
|
||||
// Performance measurement: count Stage 2 scan lock acquisitions
|
||||
if (__builtin_expect(sp_measure_enabled(), 0)) {
|
||||
atomic_fetch_add_explicit(&g_sp_stage2_lock_acquired_global, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_sp_alloc_lock_contention_global, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_sp_stage2_lock_acquired_global,
|
||||
1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_sp_alloc_lock_contention_global,
|
||||
1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(
|
||||
&g_sp_stage2_lock_acquired_by_class[class_idx],
|
||||
1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
// Update SuperSlab metadata under mutex
|
||||
@ -486,8 +506,12 @@ stage2_scan:
|
||||
|
||||
// Performance measurement: count Stage 3 lock acquisitions
|
||||
if (__builtin_expect(sp_measure_enabled(), 0)) {
|
||||
atomic_fetch_add_explicit(&g_sp_stage3_lock_acquired_global, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_sp_alloc_lock_contention_global, 1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_sp_stage3_lock_acquired_global,
|
||||
1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_sp_alloc_lock_contention_global,
|
||||
1, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit(&g_sp_stage3_lock_acquired_by_class[class_idx],
|
||||
1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
// ========== Stage 3: Get new SuperSlab ==========
|
||||
@ -619,9 +643,12 @@ void shared_pool_print_measurements(void) {
|
||||
return; // Measurement disabled
|
||||
}
|
||||
|
||||
uint64_t stage2 = atomic_load_explicit(&g_sp_stage2_lock_acquired_global, memory_order_relaxed);
|
||||
uint64_t stage3 = atomic_load_explicit(&g_sp_stage3_lock_acquired_global, memory_order_relaxed);
|
||||
uint64_t total_locks = atomic_load_explicit(&g_sp_alloc_lock_contention_global, memory_order_relaxed);
|
||||
uint64_t stage2 = atomic_load_explicit(&g_sp_stage2_lock_acquired_global,
|
||||
memory_order_relaxed);
|
||||
uint64_t stage3 = atomic_load_explicit(&g_sp_stage3_lock_acquired_global,
|
||||
memory_order_relaxed);
|
||||
uint64_t total_locks = atomic_load_explicit(&g_sp_alloc_lock_contention_global,
|
||||
memory_order_relaxed);
|
||||
|
||||
if (total_locks == 0) {
|
||||
fprintf(stderr, "\n========================================\n");
|
||||
@ -644,5 +671,27 @@ void shared_pool_print_measurements(void) {
|
||||
(unsigned long long)stage3, stage3_pct);
|
||||
fprintf(stderr, "Total Contention: %llu lock acquisitions\n",
|
||||
(unsigned long long)total_locks);
|
||||
|
||||
// Per-class breakdown(Tiny 用クラス 0-7、特に C5–C7 を観測)
|
||||
fprintf(stderr, "\nPer-class Shared Pool Locks (Stage2/Stage3):\n");
|
||||
for (int cls = 0; cls < TINY_NUM_CLASSES_SS; cls++) {
|
||||
uint64_t s2c = atomic_load_explicit(
|
||||
&g_sp_stage2_lock_acquired_by_class[cls],
|
||||
memory_order_relaxed);
|
||||
uint64_t s3c = atomic_load_explicit(
|
||||
&g_sp_stage3_lock_acquired_by_class[cls],
|
||||
memory_order_relaxed);
|
||||
uint64_t tc = s2c + s3c;
|
||||
if (tc == 0) {
|
||||
continue; // ロック取得のないクラスは省略
|
||||
}
|
||||
fprintf(stderr,
|
||||
" C%d: Stage2=%llu Stage3=%llu Total=%llu\n",
|
||||
cls,
|
||||
(unsigned long long)s2c,
|
||||
(unsigned long long)s3c,
|
||||
(unsigned long long)tc);
|
||||
}
|
||||
|
||||
fprintf(stderr, "========================================\n\n");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user