Merge separate g_tls_sll_head[] and g_tls_sll_count[] arrays into unified TinyTLSSLL struct to improve L1D cache locality. Expected performance gain: +12-18% from reducing cache line splits (2 loads → 1 load per operation). Changes: - core/hakmem_tiny.h: Add TinyTLSSLL type (16B aligned, head+count+pad) - core/hakmem_tiny.c: Replace separate arrays with g_tls_sll[8] - core/box/tls_sll_box.h: Update Box API (13 sites) for unified access - Updated 32+ files: All g_tls_sll_head[i] → g_tls_sll[i].head - Updated 32+ files: All g_tls_sll_count[i] → g_tls_sll[i].count - core/hakmem_tiny_integrity.h: Unified canary guards - core/box/integrity_box.c: Simplified canary validation - Makefile: Added core/box/tiny_sizeclass_hist_box.o to link Build: ✅ PASS (10K ops sanity test) Warnings: Only pre-existing LTO type mismatches (unrelated) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
140 lines
4.8 KiB
C
140 lines
4.8 KiB
C
// tiny_adaptive_sizing.h - Phase 2b: TLS Cache Adaptive Sizing
|
|
// Purpose: Hot classes get more cache → Better hit rate → Higher throughput
|
|
// Design: Track high-water mark, adapt capacity based on usage ratio
|
|
// Expected: +3-10% performance, -30-50% TLS cache memory overhead
|
|
|
|
#pragma once
|
|
#include "hakmem_tiny.h"
|
|
#include <stdint.h>
|
|
#include <time.h>
|
|
#include <stdio.h>
|
|
|
|
// ========== Configuration ==========
|
|
|
|
// Capacity bounds
|
|
// Phase 10: Aggressive adaptive sizing - maximize front cache utilization
|
|
#define TLS_CACHE_MIN_CAPACITY 32 // Minimum cache size (2x increase)
|
|
#define TLS_CACHE_MAX_CAPACITY 4096 // Maximum cache size (2x increase)
|
|
#define TLS_CACHE_INITIAL_CAPACITY 256 // Initial size (4x increase from 64)
|
|
|
|
// Adaptation triggers
|
|
// Phase 10: More frequent adaptation to respond quickly to workload changes
|
|
#define ADAPT_REFILL_THRESHOLD 5 // Adapt every 5 refills (was 10)
|
|
#define ADAPT_TIME_THRESHOLD_NS (500000000ULL) // Or every 0.5 seconds (was 1s)
|
|
|
|
// Growth/shrink thresholds
|
|
// Phase 10: Aggressive growth, conservative shrinkage
|
|
#define GROW_THRESHOLD 0.7 // Grow if usage > 70% of capacity (was 80%)
|
|
#define SHRINK_THRESHOLD 0.1 // Shrink if usage < 10% of capacity (was 20%)
|
|
|
|
// ========== Data Structures ==========
|
|
|
|
// Per-class TLS cache statistics
|
|
typedef struct TLSCacheStats {
|
|
size_t capacity; // Current capacity
|
|
size_t high_water_mark; // Peak usage in recent window
|
|
size_t refill_count; // Refills since last adapt
|
|
size_t shrink_count; // Shrinks (for debugging)
|
|
size_t grow_count; // Grows (for debugging)
|
|
uint64_t last_adapt_time; // Timestamp of last adaptation
|
|
} TLSCacheStats;
|
|
|
|
// TLS per-thread stats (defined in hakmem_tiny.c)
|
|
extern __thread TLSCacheStats g_tls_cache_stats[TINY_NUM_CLASSES];
|
|
|
|
// TLS cache variables (defined in hakmem_tiny.c)
|
|
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
|
|
|
|
// Global enable flag (runtime toggle via HAKMEM_ADAPTIVE_SIZING=1)
|
|
extern int g_adaptive_sizing_enabled;
|
|
|
|
// ========== Helper Functions ==========
|
|
|
|
// Get timestamp in nanoseconds
|
|
static inline uint64_t get_timestamp_ns(void) {
|
|
struct timespec ts;
|
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec;
|
|
}
|
|
|
|
// ========== Core API ==========
|
|
|
|
// Initialize adaptive sizing stats (called from hak_tiny_init)
|
|
void adaptive_sizing_init(void);
|
|
|
|
// Grow TLS cache capacity (2x)
|
|
void grow_tls_cache(int class_idx);
|
|
|
|
// Shrink TLS cache capacity (0.5x)
|
|
void shrink_tls_cache(int class_idx);
|
|
|
|
// Drain excess blocks back to SuperSlab (helper for shrink)
|
|
void drain_excess_blocks(int class_idx, int count);
|
|
|
|
// Adapt TLS cache size based on usage patterns
|
|
void adapt_tls_cache_size(int class_idx);
|
|
|
|
// Update high-water mark (called on every refill)
|
|
static inline void update_high_water_mark(int class_idx) {
|
|
if (!g_adaptive_sizing_enabled) return;
|
|
|
|
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
|
|
uint32_t current_count = g_tls_sll[class_idx].count;
|
|
|
|
if (current_count > stats->high_water_mark) {
|
|
stats->high_water_mark = current_count;
|
|
}
|
|
}
|
|
|
|
// Track refill for adaptive sizing (called after refill)
|
|
static inline void track_refill_for_adaptation(int class_idx) {
|
|
if (!g_adaptive_sizing_enabled) return;
|
|
|
|
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
|
|
stats->refill_count++;
|
|
|
|
// Update high-water mark
|
|
update_high_water_mark(class_idx);
|
|
|
|
// Periodically adapt cache size
|
|
adapt_tls_cache_size(class_idx);
|
|
}
|
|
|
|
// Get available capacity (for refill count clamping)
|
|
static inline int get_available_capacity(int class_idx) {
|
|
if (!g_adaptive_sizing_enabled) {
|
|
return 256; // Default fixed capacity
|
|
}
|
|
|
|
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
|
|
int current_count = (int)g_tls_sll[class_idx].count;
|
|
int available = (int)stats->capacity - current_count;
|
|
|
|
return (available > 0) ? available : 0;
|
|
}
|
|
|
|
// ========== Debugging & Stats ==========
|
|
|
|
// Print adaptive sizing stats for a class
|
|
static inline void print_adaptive_stats(int class_idx) {
|
|
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
|
|
|
|
fprintf(stderr, "[ADAPTIVE] Class %d: capacity=%zu, hwm=%zu, grows=%zu, shrinks=%zu, refills=%zu\n",
|
|
class_idx, stats->capacity, stats->high_water_mark,
|
|
stats->grow_count, stats->shrink_count, stats->refill_count);
|
|
}
|
|
|
|
// Print all adaptive sizing stats
|
|
static inline void print_all_adaptive_stats(void) {
|
|
if (!g_adaptive_sizing_enabled) {
|
|
fprintf(stderr, "[ADAPTIVE] Adaptive sizing disabled\n");
|
|
return;
|
|
}
|
|
|
|
fprintf(stderr, "\n========== Adaptive TLS Cache Stats ==========\n");
|
|
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
|
|
print_adaptive_stats(i);
|
|
}
|
|
fprintf(stderr, "==============================================\n\n");
|
|
}
|