Implement C4 ULTRA free TLS cache with parasitic free+alloc pattern, achieving 99.7-99.9% elimination of C4 legacy fallback calls. Key Features: - TLS cache cap=64 (tuned for L1 cache fit, smaller than C5/C6's 128) - Segment learning via ss_fast_lookup() on first free - Free-side cache push + alloc-side TLS pop pattern - ENV gate: HAKMEM_TINY_C4_ULTRA_FREE_ENABLED (default OFF) - Full FREE_PATH_STATS instrumentation Benchmark Results: C4-heavy (65-128B range): - C4 legacy: 591,583 → 1,711 (-99.7%) - c4_ultra cache hits: ~599k (free) + ~599k (alloc) - Mixed load: 340,732 → 284 C4 legacy (-99.9%) Legacy fallback reduction: - C4-heavy: 589,872 fewer legacy calls (-10.9% total) - Mixed: 340,448 fewer C4 legacy calls (-12.8% in mixed) Performance note: ~2% throughput cost in isolated C4-heavy case, acceptable tradeoff for 99%+ legacy elimination per class. Files: NEW: core/box/tiny_c4_ultra_free_box.h/c NEW: core/box/tiny_c4_ultra_free_env_box.h MOD: core/box/tiny_ultra_classes_box.h (added C4 macros) MOD: core/box/free_path_stats_box.h/c (C4 ULTRA counters) MOD: core/front/malloc_tiny_fast.h (C4 alloc+free integration) MOD: Makefile (added C4 ULTRA object) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
50 lines
1.5 KiB
C
50 lines
1.5 KiB
C
#ifndef HAKMEM_FREE_PATH_STATS_BOX_H
|
|
#define HAKMEM_FREE_PATH_STATS_BOX_H
|
|
|
|
#include <stdint.h>
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
|
|
typedef struct FreePathStats {
|
|
uint64_t total_calls;
|
|
|
|
uint64_t c7_ultra_fast;
|
|
uint64_t c6_ultra_free_fast; // Phase 4-2: C6 ULTRA-free
|
|
uint64_t c6_ultra_alloc_hit; // Phase 4-4: C6 ULTRA-alloc (TLS pop)
|
|
uint64_t c5_ultra_free_fast; // Phase 5-1: C5 ULTRA-free
|
|
uint64_t c5_ultra_alloc_hit; // Phase 5-2: C5 ULTRA-alloc (TLS pop)
|
|
uint64_t c4_ultra_free_fast; // Phase 6: C4 ULTRA-free (cap=64)
|
|
uint64_t c4_ultra_alloc_hit; // Phase 6: C4 ULTRA-alloc (TLS pop)
|
|
uint64_t smallheap_v3_fast;
|
|
uint64_t smallheap_v6_fast;
|
|
uint64_t tiny_heap_v1_fast;
|
|
uint64_t pool_v1_fast;
|
|
uint64_t remote_free;
|
|
uint64_t super_lookup_called;
|
|
uint64_t legacy_fallback;
|
|
|
|
// Phase 4-1: Legacy per-class breakdown
|
|
uint64_t legacy_by_class[8]; // C0-C7 の Legacy fallback 内訳
|
|
} FreePathStats;
|
|
|
|
// ENV gate
|
|
static inline bool free_path_stats_enabled(void) {
|
|
static int g_enabled = -1;
|
|
if (__builtin_expect(g_enabled == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_FREE_PATH_STATS");
|
|
g_enabled = (e && *e && *e != '0') ? 1 : 0;
|
|
}
|
|
return g_enabled;
|
|
}
|
|
|
|
// Global stats instance
|
|
extern FreePathStats g_free_path_stats;
|
|
|
|
// Increment macros (with unlikely guard)
|
|
#define FREE_PATH_STAT_INC(field) \
|
|
do { if (__builtin_expect(free_path_stats_enabled(), 0)) { \
|
|
g_free_path_stats.field++; \
|
|
} } while(0)
|
|
|
|
#endif // HAKMEM_FREE_PATH_STATS_BOX_H
|