Files
hakmem/core/box/tiny_near_empty_box.c
Moe Charm (CI) 984cca41ef P0 Optimization: Shared Pool fast path with O(1) metadata lookup
Performance Results:
- Throughput: 2.66M ops/s → 3.8M ops/s (+43% improvement)
- sp_meta_find_or_create: O(N) linear scan → O(1) direct pointer
- Stage 2 metadata scan: 100% → 10-20% (80-90% reduction via hints)

Core Optimizations:

1. O(1) Metadata Lookup (superslab_types.h)
   - Added `shared_meta` pointer field to SuperSlab struct
   - Eliminates O(N) linear search through ss_metadata[] array
   - First access: O(N) scan + cache | Subsequent: O(1) direct return

2. sp_meta_find_or_create Fast Path (hakmem_shared_pool.c)
   - Check cached ss->shared_meta first before linear scan
   - Cache pointer after successful linear scan for future lookups
   - Reduces 7.8% CPU hotspot to near-zero for hot paths

3. Stage 2 Class Hints Fast Path (hakmem_shared_pool_acquire.c)
   - Try class_hints[class_idx] FIRST before full metadata scan
   - Uses O(1) ss->shared_meta lookup for hint validation
   - __builtin_expect() for branch prediction optimization
   - 80-90% of acquire calls now skip full metadata scan

4. Proper Initialization (ss_allocation_box.c)
   - Initialize shared_meta = NULL in superslab_allocate()
   - Ensures correct NULL-check semantics for new SuperSlabs

Additional Improvements:
- Updated ptr_trace and debug ring for release build efficiency
- Enhanced ENV variable documentation and analysis
- Added learner_env_box.h for configuration management
- Various Box optimizations for reduced overhead

Thread Safety:
- All atomic operations use correct memory ordering
- shared_meta cached under mutex protection
- Lock-free Stage 2 uses proper CAS with acquire/release semantics

Testing:
- Benchmark: 1M iterations, 3.8M ops/s stable
- Build: Clean compile RELEASE=0 and RELEASE=1
- No crashes, memory leaks, or correctness issues

Next Optimization Candidates:
- P1: Per-SuperSlab free slot bitmap for O(1) slot claiming
- P2: Reduce Stage 2 critical section size
- P3: Page pre-faulting (MAP_POPULATE)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-04 16:21:54 +09:00

127 lines
3.7 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_near_empty_box.c - Tiny Near-Empty Slab Advisor (C2/C3)
#include "tiny_near_empty_box.h"
#include <stdlib.h>
#include <stdio.h>
#include <stdatomic.h>
// Per-class near-empty events観測用カウンタ
_Atomic uint64_t g_tiny_near_empty_events[TINY_NUM_CLASSES] = {0};
// ENV ゲート: HAKMEM_TINY_SS_PACK_C23=1 のときのみ有効。
static int g_tiny_near_empty_enabled = -1;
int tiny_near_empty_enabled(void)
{
if (__builtin_expect(g_tiny_near_empty_enabled == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_SS_PACK_C23");
g_tiny_near_empty_enabled = (e && *e && *e != '0') ? 1 : 0;
}
return g_tiny_near_empty_enabled;
}
// near-empty 判定のしきい値 (%)
static _Atomic int g_tiny_near_empty_pct = 0; // 0 = 未初期化
int tiny_near_empty_get_pct(void)
{
int pct = atomic_load_explicit(&g_tiny_near_empty_pct, memory_order_relaxed);
if (pct == 0) {
// ENV 初期化
pct = 25;
const char* env = getenv("HAKMEM_TINY_NEAREMPTY_PCT");
if (env && *env) {
int v = atoi(env);
if (v >= 1 && v <= 99) {
pct = v;
}
}
atomic_store_explicit(&g_tiny_near_empty_pct, pct, memory_order_relaxed);
}
return pct;
}
void tiny_near_empty_set_pct(int pct)
{
if (pct < 1 || pct > 99) {
return;
}
atomic_store_explicit(&g_tiny_near_empty_pct, pct, memory_order_relaxed);
}
// 内部実装: free パスから呼ばれる near-empty 判定本体。
void tiny_near_empty_on_free_impl(int class_idx, TinySlabMeta* meta)
{
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
return;
}
// いまは C2/C3 のみ対象
if (class_idx != 2 && class_idx != 3) {
return;
}
if (!meta) {
return;
}
uint16_t used = meta->used;
uint16_t cap = meta->capacity;
if (used == 0 || cap == 0) {
return;
}
int pct = tiny_near_empty_get_pct();
// 使用率 <= pct% を near-empty と定義
// used * 100 <= cap * pct
if ((uint32_t)used * 100u > (uint32_t)cap * (uint32_t)pct) {
return;
}
atomic_fetch_add_explicit(&g_tiny_near_empty_events[class_idx],
1,
memory_order_relaxed);
}
void tiny_near_empty_stats_snapshot(uint64_t events[TINY_NUM_CLASSES],
int reset)
{
if (!events && !reset) {
return;
}
for (int c = 0; c < TINY_NUM_CLASSES; c++) {
if (events) {
events[c] = atomic_load_explicit(&g_tiny_near_empty_events[c],
memory_order_relaxed);
}
if (reset) {
atomic_store_explicit(&g_tiny_near_empty_events[c],
0,
memory_order_relaxed);
}
}
}
// オプション: near-empty 統計をプロセス終了時に 1 回だけダンプ(デバッグ専用)
// ENV: HAKMEM_TINY_NEAREMPTY_DUMP=1 または HAKMEM_STATS=nearempty で有効化。
#if !HAKMEM_BUILD_RELEASE
#include "../hakmem_stats_master.h" // Phase 4d: Master stats control
static void tiny_near_empty_dump_stats(void) __attribute__((destructor));
static void tiny_near_empty_dump_stats(void)
{
if (!hak_stats_check("HAKMEM_TINY_NEAREMPTY_DUMP", "nearempty")) {
return;
}
fprintf(stderr, "[TINY_NEAR_EMPTY_STATS] class events\n");
for (int c = 0; c < TINY_NUM_CLASSES; c++) {
uint64_t v = atomic_load_explicit(&g_tiny_near_empty_events[c],
memory_order_relaxed);
if (v != 0) {
fprintf(stderr, " C%d: %llu\n", c, (unsigned long long)v);
}
}
}
#endif