Files
hakmem/core/tiny_adaptive_sizing.h
Moe Charm (CI) 707056b765 feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓

Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
  Result: +180-280% improvement, 85-146% of System malloc

Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)

Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
  Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
  Result: 50% → 95% stability (19/20 4T success)

Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
  Files: core/tiny_adaptive_sizing.c/h (new)

Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
  Files: core/hakmem_bigcache.c/h
  Expected: +10-20% cache hit rate

Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)

Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis

Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files

Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)

🤖 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00

138 lines
4.6 KiB
C

// tiny_adaptive_sizing.h - Phase 2b: TLS Cache Adaptive Sizing
// Purpose: Hot classes get more cache → Better hit rate → Higher throughput
// Design: Track high-water mark, adapt capacity based on usage ratio
// Expected: +3-10% performance, -30-50% TLS cache memory overhead
#pragma once
#include "hakmem_tiny.h"
#include <stdint.h>
#include <time.h>
#include <stdio.h>
// ========== Configuration ==========
// Capacity bounds
#define TLS_CACHE_MIN_CAPACITY 16 // Minimum cache size
#define TLS_CACHE_MAX_CAPACITY 2048 // Maximum cache size
#define TLS_CACHE_INITIAL_CAPACITY 64 // Initial size (reduced from 256)
// Adaptation triggers
#define ADAPT_REFILL_THRESHOLD 10 // Adapt every 10 refills
#define ADAPT_TIME_THRESHOLD_NS (1000000000ULL) // Or every 1 second
// Growth/shrink thresholds
#define GROW_THRESHOLD 0.8 // Grow if usage > 80% of capacity
#define SHRINK_THRESHOLD 0.2 // Shrink if usage < 20% of capacity
// ========== Data Structures ==========
// Per-class TLS cache statistics
typedef struct TLSCacheStats {
size_t capacity; // Current capacity
size_t high_water_mark; // Peak usage in recent window
size_t refill_count; // Refills since last adapt
size_t shrink_count; // Shrinks (for debugging)
size_t grow_count; // Grows (for debugging)
uint64_t last_adapt_time; // Timestamp of last adaptation
} TLSCacheStats;
// TLS per-thread stats (defined in hakmem_tiny.c)
extern __thread TLSCacheStats g_tls_cache_stats[TINY_NUM_CLASSES];
// TLS cache variables (defined in hakmem_tiny.c)
extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES];
extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
// Global enable flag (runtime toggle via HAKMEM_ADAPTIVE_SIZING=1)
extern int g_adaptive_sizing_enabled;
// ========== Helper Functions ==========
// Get timestamp in nanoseconds
static inline uint64_t get_timestamp_ns(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec;
}
// ========== Core API ==========
// Initialize adaptive sizing stats (called from hak_tiny_init)
void adaptive_sizing_init(void);
// Grow TLS cache capacity (2x)
void grow_tls_cache(int class_idx);
// Shrink TLS cache capacity (0.5x)
void shrink_tls_cache(int class_idx);
// Drain excess blocks back to SuperSlab (helper for shrink)
void drain_excess_blocks(int class_idx, int count);
// Adapt TLS cache size based on usage patterns
void adapt_tls_cache_size(int class_idx);
// Update high-water mark (called on every refill)
static inline void update_high_water_mark(int class_idx) {
if (!g_adaptive_sizing_enabled) return;
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
uint32_t current_count = g_tls_sll_count[class_idx];
if (current_count > stats->high_water_mark) {
stats->high_water_mark = current_count;
}
}
// Track refill for adaptive sizing (called after refill)
static inline void track_refill_for_adaptation(int class_idx) {
if (!g_adaptive_sizing_enabled) return;
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
stats->refill_count++;
// Update high-water mark
update_high_water_mark(class_idx);
// Periodically adapt cache size
adapt_tls_cache_size(class_idx);
}
// Get available capacity (for refill count clamping)
static inline int get_available_capacity(int class_idx) {
if (!g_adaptive_sizing_enabled) {
return 256; // Default fixed capacity
}
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
int current_count = (int)g_tls_sll_count[class_idx];
int available = (int)stats->capacity - current_count;
return (available > 0) ? available : 0;
}
// ========== Debugging & Stats ==========
// Print adaptive sizing stats for a class
static inline void print_adaptive_stats(int class_idx) {
TLSCacheStats* stats = &g_tls_cache_stats[class_idx];
fprintf(stderr, "[ADAPTIVE] Class %d: capacity=%zu, hwm=%zu, grows=%zu, shrinks=%zu, refills=%zu\n",
class_idx, stats->capacity, stats->high_water_mark,
stats->grow_count, stats->shrink_count, stats->refill_count);
}
// Print all adaptive sizing stats
static inline void print_all_adaptive_stats(void) {
if (!g_adaptive_sizing_enabled) {
fprintf(stderr, "[ADAPTIVE] Adaptive sizing disabled\n");
return;
}
fprintf(stderr, "\n========== Adaptive TLS Cache Stats ==========\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
print_adaptive_stats(i);
}
fprintf(stderr, "==============================================\n\n");
}