Performance Results: - Throughput: 2.66M ops/s → 3.8M ops/s (+43% improvement) - sp_meta_find_or_create: O(N) linear scan → O(1) direct pointer - Stage 2 metadata scan: 100% → 10-20% (80-90% reduction via hints) Core Optimizations: 1. O(1) Metadata Lookup (superslab_types.h) - Added `shared_meta` pointer field to SuperSlab struct - Eliminates O(N) linear search through ss_metadata[] array - First access: O(N) scan + cache | Subsequent: O(1) direct return 2. sp_meta_find_or_create Fast Path (hakmem_shared_pool.c) - Check cached ss->shared_meta first before linear scan - Cache pointer after successful linear scan for future lookups - Reduces 7.8% CPU hotspot to near-zero for hot paths 3. Stage 2 Class Hints Fast Path (hakmem_shared_pool_acquire.c) - Try class_hints[class_idx] FIRST before full metadata scan - Uses O(1) ss->shared_meta lookup for hint validation - __builtin_expect() for branch prediction optimization - 80-90% of acquire calls now skip full metadata scan 4. Proper Initialization (ss_allocation_box.c) - Initialize shared_meta = NULL in superslab_allocate() - Ensures correct NULL-check semantics for new SuperSlabs Additional Improvements: - Updated ptr_trace and debug ring for release build efficiency - Enhanced ENV variable documentation and analysis - Added learner_env_box.h for configuration management - Various Box optimizations for reduced overhead Thread Safety: - All atomic operations use correct memory ordering - shared_meta cached under mutex protection - Lock-free Stage 2 uses proper CAS with acquire/release semantics Testing: - Benchmark: 1M iterations, 3.8M ops/s stable - Build: Clean compile RELEASE=0 and RELEASE=1 - No crashes, memory leaks, or correctness issues Next Optimization Candidates: - P1: Per-SuperSlab free slot bitmap for O(1) slot claiming - P2: Reduce Stage 2 critical section size - P3: Page pre-faulting (MAP_POPULATE) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
158 lines
5.7 KiB
C
158 lines
5.7 KiB
C
// hakmem_stats_master.h - Master Stats/Dump Control
|
|
//
|
|
// ENV Cleanup Phase 4d: Unified stats/dump control
|
|
//
|
|
// Usage:
|
|
// HAKMEM_STATS=all Enable ALL stats modules
|
|
// HAKMEM_STATS=sfc,fast,refill Enable specific modules (comma-separated)
|
|
// HAKMEM_STATS_DUMP=1 Dump stats at exit (atexit handler)
|
|
//
|
|
// Available stats modules:
|
|
// sfc - Super Front Cache (HAKMEM_SFC_STATS_DUMP)
|
|
// fast - Fast cache (HAKMEM_TINY_FAST_STATS)
|
|
// heap - Heap V2 (HAKMEM_TINY_HEAP_V2_STATS)
|
|
// refill - Refill stats (HAKMEM_TINY_REFILL_DUMP)
|
|
// counters - Counters (HAKMEM_TINY_COUNTERS_DUMP)
|
|
// ring - Debug ring (HAKMEM_TINY_DUMP_RING_ATEXIT)
|
|
// invariant - Invariant checks (HAKMEM_TINY_INVARIANT_DUMP)
|
|
// pagefault - Page fault telemetry (HAKMEM_TINY_PAGEFAULT_DUMP)
|
|
// front - Front metrics (HAKMEM_TINY_FRONT_DUMP)
|
|
// pool - Shared pool (HAKMEM_SHARED_POOL_STAGE_STATS)
|
|
// slim - Ultra slim (HAKMEM_ULTRA_SLIM_STATS)
|
|
// guard - External guard (HAKMEM_EXTERNAL_GUARD_STATS)
|
|
// nearempty - Near empty (HAKMEM_TINY_NEAREMPTY_DUMP)
|
|
//
|
|
// Priority (highest to lowest):
|
|
// 1. HAKMEM_QUIET=1 → suppress all stats output
|
|
// 2. Specific module ENV (e.g., HAKMEM_SFC_STATS_DUMP=1) → use that value
|
|
// 3. HAKMEM_STATS=module1,module2 → enable listed modules
|
|
// 4. HAKMEM_STATS=all → enable all
|
|
// 5. Default → disabled
|
|
//
|
|
// Integration example:
|
|
// Old: static int dump = -1;
|
|
// if (dump==-1) { const char* e = getenv("HAKMEM_FOO_DUMP"); dump = (e && *e != '0') ? 1 : 0; }
|
|
//
|
|
// New: static int dump = -1;
|
|
// if (dump==-1) { dump = hak_stats_check("HAKMEM_FOO_DUMP", "foo"); }
|
|
|
|
#ifndef HAKMEM_STATS_MASTER_H
|
|
#define HAKMEM_STATS_MASTER_H
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
// Stats module flags (bit positions)
|
|
#define HAK_STATS_SFC (1 << 0)
|
|
#define HAK_STATS_FAST (1 << 1)
|
|
#define HAK_STATS_HEAP (1 << 2)
|
|
#define HAK_STATS_REFILL (1 << 3)
|
|
#define HAK_STATS_COUNTERS (1 << 4)
|
|
#define HAK_STATS_RING (1 << 5)
|
|
#define HAK_STATS_INVARIANT (1 << 6)
|
|
#define HAK_STATS_PAGEFAULT (1 << 7)
|
|
#define HAK_STATS_FRONT (1 << 8)
|
|
#define HAK_STATS_POOL (1 << 9)
|
|
#define HAK_STATS_SLIM (1 << 10)
|
|
#define HAK_STATS_GUARD (1 << 11)
|
|
#define HAK_STATS_NEAREMPTY (1 << 12)
|
|
#define HAK_STATS_TRACE (1 << 13)
|
|
#define HAK_STATS_ALL 0xFFFFFFFF
|
|
|
|
// Master stats state (cached at first access)
|
|
static int g_stats_master_mask = -1; // -1 = uninitialized
|
|
static int g_stats_dump_atexit = -1;
|
|
static int g_stats_quiet = -1;
|
|
|
|
// Map module name to bit flag
|
|
static inline int hak_stats_name_to_flag(const char* name) {
|
|
if (!name || !*name) return 0;
|
|
if (strcmp(name, "all") == 0) return HAK_STATS_ALL;
|
|
if (strcmp(name, "sfc") == 0) return HAK_STATS_SFC;
|
|
if (strcmp(name, "fast") == 0) return HAK_STATS_FAST;
|
|
if (strcmp(name, "heap") == 0) return HAK_STATS_HEAP;
|
|
if (strcmp(name, "refill") == 0) return HAK_STATS_REFILL;
|
|
if (strcmp(name, "counters") == 0) return HAK_STATS_COUNTERS;
|
|
if (strcmp(name, "ring") == 0) return HAK_STATS_RING;
|
|
if (strcmp(name, "invariant") == 0) return HAK_STATS_INVARIANT;
|
|
if (strcmp(name, "pagefault") == 0) return HAK_STATS_PAGEFAULT;
|
|
if (strcmp(name, "front") == 0) return HAK_STATS_FRONT;
|
|
if (strcmp(name, "pool") == 0) return HAK_STATS_POOL;
|
|
if (strcmp(name, "slim") == 0) return HAK_STATS_SLIM;
|
|
if (strcmp(name, "guard") == 0) return HAK_STATS_GUARD;
|
|
if (strcmp(name, "nearempty") == 0) return HAK_STATS_NEAREMPTY;
|
|
if (strcmp(name, "trace") == 0) return HAK_STATS_TRACE;
|
|
return 0;
|
|
}
|
|
|
|
// Parse comma-separated module list
|
|
static inline int hak_stats_parse_modules(const char* str) {
|
|
if (!str || !*str) return 0;
|
|
|
|
int mask = 0;
|
|
char buf[256];
|
|
strncpy(buf, str, sizeof(buf) - 1);
|
|
buf[sizeof(buf) - 1] = '\0';
|
|
|
|
char* token = strtok(buf, ",");
|
|
while (token) {
|
|
// Trim whitespace
|
|
while (*token == ' ') token++;
|
|
char* end = token + strlen(token) - 1;
|
|
while (end > token && *end == ' ') *end-- = '\0';
|
|
|
|
mask |= hak_stats_name_to_flag(token);
|
|
token = strtok(NULL, ",");
|
|
}
|
|
return mask;
|
|
}
|
|
|
|
// Initialize master stats settings (called once, lazily)
|
|
static inline void hak_stats_master_init(void) {
|
|
if (__builtin_expect(g_stats_master_mask >= 0, 1)) return;
|
|
|
|
// Check HAKMEM_QUIET first (highest priority)
|
|
const char* quiet = getenv("HAKMEM_QUIET");
|
|
g_stats_quiet = (quiet && *quiet && *quiet != '0') ? 1 : 0;
|
|
|
|
// Check HAKMEM_STATS
|
|
const char* stats = getenv("HAKMEM_STATS");
|
|
g_stats_master_mask = hak_stats_parse_modules(stats);
|
|
|
|
// Check HAKMEM_STATS_DUMP (dump at exit)
|
|
const char* dump = getenv("HAKMEM_STATS_DUMP");
|
|
g_stats_dump_atexit = (dump && *dump && *dump != '0') ? 1 :
|
|
(g_stats_master_mask > 0 ? 1 : 0);
|
|
}
|
|
|
|
// Check if a specific stats module should be enabled
|
|
// env_name: The specific ENV variable (e.g., "HAKMEM_SFC_STATS_DUMP")
|
|
// module: The module name for HAKMEM_STATS (e.g., "sfc")
|
|
// Returns: 1 if stats should be enabled, 0 otherwise
|
|
static inline int hak_stats_check(const char* env_name, const char* module) {
|
|
hak_stats_master_init();
|
|
|
|
// HAKMEM_QUIET overrides everything
|
|
if (g_stats_quiet) return 0;
|
|
|
|
// Check specific ENV first (allows explicit enable/disable)
|
|
const char* e = getenv(env_name);
|
|
if (e && *e) {
|
|
return (*e != '0') ? 1 : 0;
|
|
}
|
|
|
|
// Fall back to master stats mask
|
|
int flag = hak_stats_name_to_flag(module);
|
|
if (flag && (g_stats_master_mask & flag)) return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Check if stats dump at exit is enabled
|
|
static inline int hak_stats_dump_enabled(void) {
|
|
hak_stats_master_init();
|
|
return g_stats_quiet ? 0 : g_stats_dump_atexit;
|
|
}
|
|
|
|
#endif // HAKMEM_STATS_MASTER_H
|