Files
hakmem/core/hakmem_stats_master.h

158 lines
5.7 KiB
C
Raw Normal View History

// hakmem_stats_master.h - Master Stats/Dump Control
//
// ENV Cleanup Phase 4d: Unified stats/dump control
//
// Usage:
// HAKMEM_STATS=all Enable ALL stats modules
// HAKMEM_STATS=sfc,fast,refill Enable specific modules (comma-separated)
// HAKMEM_STATS_DUMP=1 Dump stats at exit (atexit handler)
//
// Available stats modules:
// sfc - Super Front Cache (HAKMEM_SFC_STATS_DUMP)
// fast - Fast cache (HAKMEM_TINY_FAST_STATS)
// heap - Heap V2 (HAKMEM_TINY_HEAP_V2_STATS)
// refill - Refill stats (HAKMEM_TINY_REFILL_DUMP)
// counters - Counters (HAKMEM_TINY_COUNTERS_DUMP)
// ring - Debug ring (HAKMEM_TINY_DUMP_RING_ATEXIT)
// invariant - Invariant checks (HAKMEM_TINY_INVARIANT_DUMP)
// pagefault - Page fault telemetry (HAKMEM_TINY_PAGEFAULT_DUMP)
// front - Front metrics (HAKMEM_TINY_FRONT_DUMP)
// pool - Shared pool (HAKMEM_SHARED_POOL_STAGE_STATS)
// slim - Ultra slim (HAKMEM_ULTRA_SLIM_STATS)
// guard - External guard (HAKMEM_EXTERNAL_GUARD_STATS)
// nearempty - Near empty (HAKMEM_TINY_NEAREMPTY_DUMP)
//
// Priority (highest to lowest):
// 1. HAKMEM_QUIET=1 → suppress all stats output
// 2. Specific module ENV (e.g., HAKMEM_SFC_STATS_DUMP=1) → use that value
// 3. HAKMEM_STATS=module1,module2 → enable listed modules
// 4. HAKMEM_STATS=all → enable all
// 5. Default → disabled
//
// Integration example:
// Old: static int dump = -1;
// if (dump==-1) { const char* e = getenv("HAKMEM_FOO_DUMP"); dump = (e && *e != '0') ? 1 : 0; }
//
// New: static int dump = -1;
// if (dump==-1) { dump = hak_stats_check("HAKMEM_FOO_DUMP", "foo"); }
#ifndef HAKMEM_STATS_MASTER_H
#define HAKMEM_STATS_MASTER_H
#include <stdlib.h>
#include <string.h>
// Stats module flags (bit positions)
#define HAK_STATS_SFC (1 << 0)
#define HAK_STATS_FAST (1 << 1)
#define HAK_STATS_HEAP (1 << 2)
#define HAK_STATS_REFILL (1 << 3)
#define HAK_STATS_COUNTERS (1 << 4)
#define HAK_STATS_RING (1 << 5)
#define HAK_STATS_INVARIANT (1 << 6)
#define HAK_STATS_PAGEFAULT (1 << 7)
#define HAK_STATS_FRONT (1 << 8)
#define HAK_STATS_POOL (1 << 9)
#define HAK_STATS_SLIM (1 << 10)
#define HAK_STATS_GUARD (1 << 11)
#define HAK_STATS_NEAREMPTY (1 << 12)
P0 Optimization: Shared Pool fast path with O(1) metadata lookup Performance Results: - Throughput: 2.66M ops/s → 3.8M ops/s (+43% improvement) - sp_meta_find_or_create: O(N) linear scan → O(1) direct pointer - Stage 2 metadata scan: 100% → 10-20% (80-90% reduction via hints) Core Optimizations: 1. O(1) Metadata Lookup (superslab_types.h) - Added `shared_meta` pointer field to SuperSlab struct - Eliminates O(N) linear search through ss_metadata[] array - First access: O(N) scan + cache | Subsequent: O(1) direct return 2. sp_meta_find_or_create Fast Path (hakmem_shared_pool.c) - Check cached ss->shared_meta first before linear scan - Cache pointer after successful linear scan for future lookups - Reduces 7.8% CPU hotspot to near-zero for hot paths 3. Stage 2 Class Hints Fast Path (hakmem_shared_pool_acquire.c) - Try class_hints[class_idx] FIRST before full metadata scan - Uses O(1) ss->shared_meta lookup for hint validation - __builtin_expect() for branch prediction optimization - 80-90% of acquire calls now skip full metadata scan 4. Proper Initialization (ss_allocation_box.c) - Initialize shared_meta = NULL in superslab_allocate() - Ensures correct NULL-check semantics for new SuperSlabs Additional Improvements: - Updated ptr_trace and debug ring for release build efficiency - Enhanced ENV variable documentation and analysis - Added learner_env_box.h for configuration management - Various Box optimizations for reduced overhead Thread Safety: - All atomic operations use correct memory ordering - shared_meta cached under mutex protection - Lock-free Stage 2 uses proper CAS with acquire/release semantics Testing: - Benchmark: 1M iterations, 3.8M ops/s stable - Build: Clean compile RELEASE=0 and RELEASE=1 - No crashes, memory leaks, or correctness issues Next Optimization Candidates: - P1: Per-SuperSlab free slot bitmap for O(1) slot claiming - P2: Reduce Stage 2 critical section size - P3: Page pre-faulting (MAP_POPULATE) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-04 16:21:54 +09:00
#define HAK_STATS_TRACE (1 << 13)
#define HAK_STATS_ALL 0xFFFFFFFF
// Master stats state (cached at first access)
static int g_stats_master_mask = -1; // -1 = uninitialized
static int g_stats_dump_atexit = -1;
static int g_stats_quiet = -1;
// Map module name to bit flag
static inline int hak_stats_name_to_flag(const char* name) {
if (!name || !*name) return 0;
if (strcmp(name, "all") == 0) return HAK_STATS_ALL;
if (strcmp(name, "sfc") == 0) return HAK_STATS_SFC;
if (strcmp(name, "fast") == 0) return HAK_STATS_FAST;
if (strcmp(name, "heap") == 0) return HAK_STATS_HEAP;
if (strcmp(name, "refill") == 0) return HAK_STATS_REFILL;
if (strcmp(name, "counters") == 0) return HAK_STATS_COUNTERS;
if (strcmp(name, "ring") == 0) return HAK_STATS_RING;
if (strcmp(name, "invariant") == 0) return HAK_STATS_INVARIANT;
if (strcmp(name, "pagefault") == 0) return HAK_STATS_PAGEFAULT;
if (strcmp(name, "front") == 0) return HAK_STATS_FRONT;
if (strcmp(name, "pool") == 0) return HAK_STATS_POOL;
if (strcmp(name, "slim") == 0) return HAK_STATS_SLIM;
if (strcmp(name, "guard") == 0) return HAK_STATS_GUARD;
if (strcmp(name, "nearempty") == 0) return HAK_STATS_NEAREMPTY;
P0 Optimization: Shared Pool fast path with O(1) metadata lookup Performance Results: - Throughput: 2.66M ops/s → 3.8M ops/s (+43% improvement) - sp_meta_find_or_create: O(N) linear scan → O(1) direct pointer - Stage 2 metadata scan: 100% → 10-20% (80-90% reduction via hints) Core Optimizations: 1. O(1) Metadata Lookup (superslab_types.h) - Added `shared_meta` pointer field to SuperSlab struct - Eliminates O(N) linear search through ss_metadata[] array - First access: O(N) scan + cache | Subsequent: O(1) direct return 2. sp_meta_find_or_create Fast Path (hakmem_shared_pool.c) - Check cached ss->shared_meta first before linear scan - Cache pointer after successful linear scan for future lookups - Reduces 7.8% CPU hotspot to near-zero for hot paths 3. Stage 2 Class Hints Fast Path (hakmem_shared_pool_acquire.c) - Try class_hints[class_idx] FIRST before full metadata scan - Uses O(1) ss->shared_meta lookup for hint validation - __builtin_expect() for branch prediction optimization - 80-90% of acquire calls now skip full metadata scan 4. Proper Initialization (ss_allocation_box.c) - Initialize shared_meta = NULL in superslab_allocate() - Ensures correct NULL-check semantics for new SuperSlabs Additional Improvements: - Updated ptr_trace and debug ring for release build efficiency - Enhanced ENV variable documentation and analysis - Added learner_env_box.h for configuration management - Various Box optimizations for reduced overhead Thread Safety: - All atomic operations use correct memory ordering - shared_meta cached under mutex protection - Lock-free Stage 2 uses proper CAS with acquire/release semantics Testing: - Benchmark: 1M iterations, 3.8M ops/s stable - Build: Clean compile RELEASE=0 and RELEASE=1 - No crashes, memory leaks, or correctness issues Next Optimization Candidates: - P1: Per-SuperSlab free slot bitmap for O(1) slot claiming - P2: Reduce Stage 2 critical section size - P3: Page pre-faulting (MAP_POPULATE) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-04 16:21:54 +09:00
if (strcmp(name, "trace") == 0) return HAK_STATS_TRACE;
return 0;
}
// Parse comma-separated module list
static inline int hak_stats_parse_modules(const char* str) {
if (!str || !*str) return 0;
int mask = 0;
char buf[256];
strncpy(buf, str, sizeof(buf) - 1);
buf[sizeof(buf) - 1] = '\0';
char* token = strtok(buf, ",");
while (token) {
// Trim whitespace
while (*token == ' ') token++;
char* end = token + strlen(token) - 1;
while (end > token && *end == ' ') *end-- = '\0';
mask |= hak_stats_name_to_flag(token);
token = strtok(NULL, ",");
}
return mask;
}
// Initialize master stats settings (called once, lazily)
static inline void hak_stats_master_init(void) {
if (__builtin_expect(g_stats_master_mask >= 0, 1)) return;
// Check HAKMEM_QUIET first (highest priority)
const char* quiet = getenv("HAKMEM_QUIET");
g_stats_quiet = (quiet && *quiet && *quiet != '0') ? 1 : 0;
// Check HAKMEM_STATS
const char* stats = getenv("HAKMEM_STATS");
g_stats_master_mask = hak_stats_parse_modules(stats);
// Check HAKMEM_STATS_DUMP (dump at exit)
const char* dump = getenv("HAKMEM_STATS_DUMP");
g_stats_dump_atexit = (dump && *dump && *dump != '0') ? 1 :
(g_stats_master_mask > 0 ? 1 : 0);
}
// Check if a specific stats module should be enabled
// env_name: The specific ENV variable (e.g., "HAKMEM_SFC_STATS_DUMP")
// module: The module name for HAKMEM_STATS (e.g., "sfc")
// Returns: 1 if stats should be enabled, 0 otherwise
static inline int hak_stats_check(const char* env_name, const char* module) {
hak_stats_master_init();
// HAKMEM_QUIET overrides everything
if (g_stats_quiet) return 0;
// Check specific ENV first (allows explicit enable/disable)
const char* e = getenv(env_name);
if (e && *e) {
return (*e != '0') ? 1 : 0;
}
// Fall back to master stats mask
int flag = hak_stats_name_to_flag(module);
if (flag && (g_stats_master_mask & flag)) return 1;
return 0;
}
// Check if stats dump at exit is enabled
static inline int hak_stats_dump_enabled(void) {
hak_stats_master_init();
return g_stats_quiet ? 0 : g_stats_dump_atexit;
}
#endif // HAKMEM_STATS_MASTER_H