Files
hakmem/core/tiny_adaptive_sizing.c
Moe Charm (CI) 9b0d746407 Phase 3d-B: TLS Cache Merge - Unified g_tls_sll[] structure (+12-18% expected)
Merge separate g_tls_sll_head[] and g_tls_sll_count[] arrays into unified
TinyTLSSLL struct to improve L1D cache locality. Expected performance gain:
+12-18% from reducing cache line splits (2 loads → 1 load per operation).

Changes:
- core/hakmem_tiny.h: Add TinyTLSSLL type (16B aligned, head+count+pad)
- core/hakmem_tiny.c: Replace separate arrays with g_tls_sll[8]
- core/box/tls_sll_box.h: Update Box API (13 sites) for unified access
- Updated 32+ files: All g_tls_sll_head[i] → g_tls_sll[i].head
- Updated 32+ files: All g_tls_sll_count[i] → g_tls_sll[i].count
- core/hakmem_tiny_integrity.h: Unified canary guards
- core/box/integrity_box.c: Simplified canary validation
- Makefile: Added core/box/tiny_sizeclass_hist_box.o to link

Build:  PASS (10K ops sanity test)
Warnings: Only pre-existing LTO type mismatches (unrelated)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-20 07:32:30 +09:00

180 lines
5.9 KiB
C

// tiny_adaptive_sizing.c - Phase 2b: TLS Cache Adaptive Sizing Implementation
// Purpose: Hot classes get more cache → Better hit rate → Higher throughput
#include "tiny_adaptive_sizing.h"
#include "hakmem_tiny.h"
#include "box/tiny_next_ptr_box.h" // Phase E1-CORRECT: Box API
#include <stdio.h>
#include <stdlib.h>
// TLS per-thread stats
__thread TLSCacheStats g_tls_cache_stats[TINY_NUM_CLASSES];
// Global enable flag (default: enabled, can disable via env)
int g_adaptive_sizing_enabled = 1;
// Logging enable flag (default: disabled; enable via HAKMEM_ADAPTIVE_LOG=1)
static int g_adaptive_logging_enabled = 0;
// Forward declaration for draining blocks
extern void tiny_superslab_return_block(void* ptr, int class_idx);
extern int hak_tiny_size_to_class(size_t size);
// ========== Initialization ==========
void adaptive_sizing_init(void) {
// Read environment variable
const char* env = getenv("HAKMEM_ADAPTIVE_SIZING");
if (env && atoi(env) == 0) {
g_adaptive_sizing_enabled = 0;
fprintf(stderr, "[ADAPTIVE] Adaptive sizing disabled via env\n");
return;
}
// Read logging flag
const char* log_env = getenv("HAKMEM_ADAPTIVE_LOG");
if (log_env && atoi(log_env) == 0) {
g_adaptive_logging_enabled = 0;
}
// Initialize stats for each class
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
stats->capacity = TLS_CACHE_INITIAL_CAPACITY; // Start with 64 slots
stats->high_water_mark = 0;
stats->refill_count = 0;
stats->shrink_count = 0;
stats->grow_count = 0;
stats->last_adapt_time = get_timestamp_ns();
}
if (g_adaptive_logging_enabled) {
fprintf(stderr, "[ADAPTIVE] Adaptive sizing initialized (initial_cap=%d, min=%d, max=%d)\n",
TLS_CACHE_INITIAL_CAPACITY, TLS_CACHE_MIN_CAPACITY, TLS_CACHE_MAX_CAPACITY);
}
}
// ========== Grow/Shrink Functions ==========
void grow_tls_cache(int class_idx) {
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
// Phase 10: Aggressive growth - add 50% instead of doubling
// This allows more gradual growth to match actual demand
size_t new_capacity = stats->capacity + (stats->capacity / 2);
if (new_capacity > TLS_CACHE_MAX_CAPACITY) {
new_capacity = TLS_CACHE_MAX_CAPACITY;
}
if (new_capacity == stats->capacity) {
return; // Already at max
}
size_t old_capacity = stats->capacity;
stats->capacity = new_capacity;
stats->grow_count++;
if (g_adaptive_logging_enabled) {
fprintf(stderr, "[TLS_CACHE] Grow class %d: %zu → %zu slots (+50%%, grow_count=%zu)\n",
class_idx, old_capacity, stats->capacity, stats->grow_count);
}
}
void drain_excess_blocks(int class_idx, int count) {
void** head = &g_tls_sll[class_idx].head;
int drained = 0;
while (*head && drained < count) {
void* block = *head;
*head = tiny_next_read(class_idx, block); // Pop from TLS list
// Return to SuperSlab (best effort - ignore failures)
// Note: tiny_superslab_return_block may not exist, use simpler approach
// Just drop the blocks for now (they'll be reclaimed by OS eventually)
// TODO: Integrate with proper SuperSlab return path
drained++;
if (g_tls_sll[class_idx].count > 0) {
g_tls_sll[class_idx].count--;
}
}
if (g_adaptive_logging_enabled && drained > 0) {
fprintf(stderr, "[TLS_CACHE] Drained %d excess blocks from class %d\n", drained, class_idx);
}
}
void shrink_tls_cache(int class_idx) {
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
size_t new_capacity = stats->capacity / 2;
if (new_capacity < TLS_CACHE_MIN_CAPACITY) {
new_capacity = TLS_CACHE_MIN_CAPACITY;
}
if (new_capacity == stats->capacity) {
return; // Already at min
}
// Evict excess blocks if current count > new_capacity
if (g_tls_sll[class_idx].count > new_capacity) {
int excess = (int)(g_tls_sll[class_idx].count - new_capacity);
drain_excess_blocks(class_idx, excess);
}
size_t old_capacity = stats->capacity;
stats->capacity = new_capacity;
stats->shrink_count++;
if (g_adaptive_logging_enabled) {
fprintf(stderr, "[TLS_CACHE] Shrink class %d: %zu → %zu slots (shrink_count=%zu)\n",
class_idx, old_capacity, stats->capacity, stats->shrink_count);
}
}
// ========== Adaptation Logic ==========
void adapt_tls_cache_size(int class_idx) {
if (!g_adaptive_sizing_enabled) return;
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
// Adapt every N refills or M seconds
uint64_t now = get_timestamp_ns();
int should_adapt = (stats->refill_count >= ADAPT_REFILL_THRESHOLD) ||
((now - stats->last_adapt_time) >= ADAPT_TIME_THRESHOLD_NS);
if (!should_adapt) {
return; // Too soon to adapt
}
// Avoid division by zero
if (stats->capacity == 0) {
stats->capacity = TLS_CACHE_INITIAL_CAPACITY;
return;
}
// Calculate usage ratio
double usage_ratio = (double)stats->high_water_mark / (double)stats->capacity;
// Decide: grow, shrink, or keep
if (usage_ratio > GROW_THRESHOLD) {
// High usage (>80%) → grow cache
grow_tls_cache(class_idx);
} else if (usage_ratio < SHRINK_THRESHOLD) {
// Low usage (<20%) → shrink cache
shrink_tls_cache(class_idx);
} else {
// Moderate usage (20-80%) → keep current size
if (g_adaptive_logging_enabled) {
fprintf(stderr, "[TLS_CACHE] Keep class %d at %zu slots (usage=%.1f%%)\n",
class_idx, stats->capacity, usage_ratio * 100.0);
}
}
// Reset stats for next window
stats->high_water_mark = g_tls_sll[class_idx].count;
stats->refill_count = 0;
stats->last_adapt_time = now;
}