Files
hakmem/core/tiny_adaptive_sizing.h
Moe Charm (CI) 030132f911 Phase 10: TLS/SFC aggressive cache tuning (syscall reduction failed)
Goal: Reduce backend transitions by increasing frontend hit rate
Result: +2% best case, syscalls unchanged (root cause: SuperSlab churn)

Implementation:

1. Cache capacity expansion (2-8x per-class)
   - Hot classes (C0-C3): 4x increase (512 slots)
   - Medium classes (C4-C6): 2-3x increase
   - Class 7 (1KB): 2x increase (128 slots)
   - Fast cache: 2x default capacity

2. Refill batch size increase (4-8x)
   - Global default: 16 → 64 (4x)
   - Hot classes: 128 (8x) via HAKMEM_TINY_REFILL_COUNT_HOT
   - Mid classes: 96 (6x) via HAKMEM_TINY_REFILL_COUNT_MID
   - Class 7: 64 → 128 (2x)
   - SFC refill: 64 → 128 (2x)

3. Adaptive sizing aggressive parameters
   - Grow threshold: 80% → 70% (expand earlier)
   - Shrink threshold: 20% → 10% (shrink less)
   - Growth rate: 2x → 1.5x (smoother growth)
   - Max capacity: 2048 → 4096 (2x ceiling)
   - Adapt frequency: Every 10 → 5 refills (more responsive)

Performance Results (100K iterations):

Before (Phase 9):
- Performance: 9.71M ops/s
- Syscalls: 1,729 (mmap:877, munmap:852)

After (Phase 10):
- Default settings: 8.77M ops/s (-9.7%) ⚠️
- Optimal ENV: 9.89M ops/s (+2%) 
- Syscalls: 1,729 (unchanged) 

Optimal ENV configuration:
export HAKMEM_TINY_REFILL_COUNT_HOT=256
export HAKMEM_TINY_REFILL_COUNT_MID=192

Root Cause Analysis:

Bottleneck is NOT TLS/SFC hit rate, but SuperSlab allocation churn:
- 877 SuperSlabs allocated (877MB via mmap)
- Phase 9 LRU cache not utilized (no frees during benchmark)
- All SuperSlabs retained until program exit
- System malloc: 9 syscalls vs HAKMEM: 1,729 syscalls (192x gap)

Conclusion:

TLS/SFC tuning cannot solve SuperSlab allocation policy problem.
Next step: Phase 11 SuperSlab Prewarm strategy to eliminate
mmap/munmap during benchmark execution.

ChatGPT review: Strategy validated, Option A (Prewarm) recommended.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 14:25:54 +09:00

141 lines
4.8 KiB
C

// tiny_adaptive_sizing.h - Phase 2b: TLS Cache Adaptive Sizing
// Purpose: Hot classes get more cache → Better hit rate → Higher throughput
// Design: Track high-water mark, adapt capacity based on usage ratio
// Expected: +3-10% performance, -30-50% TLS cache memory overhead
#pragma once
#include "hakmem_tiny.h"
#include <stdint.h>
#include <time.h>
#include <stdio.h>
// ========== Configuration ==========
// Capacity bounds
// Phase 10: Aggressive adaptive sizing - maximize front cache utilization
#define TLS_CACHE_MIN_CAPACITY 32 // Minimum cache size (2x increase)
#define TLS_CACHE_MAX_CAPACITY 4096 // Maximum cache size (2x increase)
#define TLS_CACHE_INITIAL_CAPACITY 256 // Initial size (4x increase from 64)
// Adaptation triggers
// Phase 10: More frequent adaptation to respond quickly to workload changes
#define ADAPT_REFILL_THRESHOLD 5 // Adapt every 5 refills (was 10)
#define ADAPT_TIME_THRESHOLD_NS (500000000ULL) // Or every 0.5 seconds (was 1s)
// Growth/shrink thresholds
// Phase 10: Aggressive growth, conservative shrinkage
#define GROW_THRESHOLD 0.7 // Grow if usage > 70% of capacity (was 80%)
#define SHRINK_THRESHOLD 0.1 // Shrink if usage < 10% of capacity (was 20%)
// ========== Data Structures ==========
// Per-class TLS cache statistics
typedef struct TLSCacheStats {
size_t capacity; // Current capacity
size_t high_water_mark; // Peak usage in recent window
size_t refill_count; // Refills since last adapt
size_t shrink_count; // Shrinks (for debugging)
size_t grow_count; // Grows (for debugging)
uint64_t last_adapt_time; // Timestamp of last adaptation
} TLSCacheStats;
// TLS per-thread stats (defined in hakmem_tiny.c)
extern __thread TLSCacheStats g_tls_cache_stats[TINY_NUM_CLASSES];
// TLS cache variables (defined in hakmem_tiny.c)
extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES];
extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
// Global enable flag (runtime toggle via HAKMEM_ADAPTIVE_SIZING=1)
extern int g_adaptive_sizing_enabled;
// ========== Helper Functions ==========
// Get timestamp in nanoseconds
static inline uint64_t get_timestamp_ns(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec;
}
// ========== Core API ==========
// Initialize adaptive sizing stats (called from hak_tiny_init)
void adaptive_sizing_init(void);
// Grow TLS cache capacity (2x)
void grow_tls_cache(int class_idx);
// Shrink TLS cache capacity (0.5x)
void shrink_tls_cache(int class_idx);
// Drain excess blocks back to SuperSlab (helper for shrink)
void drain_excess_blocks(int class_idx, int count);
// Adapt TLS cache size based on usage patterns
void adapt_tls_cache_size(int class_idx);
// Update high-water mark (called on every refill)
static inline void update_high_water_mark(int class_idx) {
if (!g_adaptive_sizing_enabled) return;
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
uint32_t current_count = g_tls_sll_count[class_idx];
if (current_count > stats->high_water_mark) {
stats->high_water_mark = current_count;
}
}
// Track refill for adaptive sizing (called after refill)
static inline void track_refill_for_adaptation(int class_idx) {
if (!g_adaptive_sizing_enabled) return;
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
stats->refill_count++;
// Update high-water mark
update_high_water_mark(class_idx);
// Periodically adapt cache size
adapt_tls_cache_size(class_idx);
}
// Get available capacity (for refill count clamping)
static inline int get_available_capacity(int class_idx) {
if (!g_adaptive_sizing_enabled) {
return 256; // Default fixed capacity
}
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
int current_count = (int)g_tls_sll_count[class_idx];
int available = (int)stats->capacity - current_count;
return (available > 0) ? available : 0;
}
// ========== Debugging & Stats ==========
// Print adaptive sizing stats for a class
static inline void print_adaptive_stats(int class_idx) {
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
fprintf(stderr, "[ADAPTIVE] Class %d: capacity=%zu, hwm=%zu, grows=%zu, shrinks=%zu, refills=%zu\n",
class_idx, stats->capacity, stats->high_water_mark,
stats->grow_count, stats->shrink_count, stats->refill_count);
}
// Print all adaptive sizing stats
static inline void print_all_adaptive_stats(void) {
if (!g_adaptive_sizing_enabled) {
fprintf(stderr, "[ADAPTIVE] Adaptive sizing disabled\n");
return;
}
fprintf(stderr, "\n========== Adaptive TLS Cache Stats ==========\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
print_adaptive_stats(i);
}
fprintf(stderr, "==============================================\n\n");
}