ChatGPT's diagnostic changes to address TLS_SLL_HDR_RESET issue. Current status: Partial mitigation, but root cause remains. Changes Applied: 1. SuperSlab Registry Fallback (hakmem_super_registry.h) - Added legacy table probe when hash map lookup misses - Prevents NULL returns for valid SuperSlabs during initialization - Status: ✅ Works but may hide underlying registration issues 2. TLS SLL Push Validation (tls_sll_box.h) - Reject push if SuperSlab lookup returns NULL - Reject push if class_idx mismatch detected - Added [TLS_SLL_PUSH_NO_SS] diagnostic message - Status: ✅ Prevents list corruption (defensive) 3. SuperSlab Allocation Class Fix (superslab_allocate.c) - Pass actual class_idx to sp_internal_allocate_superslab - Prevents dummy class=8 causing OOB access - Status: ✅ Root cause fix for allocation path 4. Debug Output Additions - First 256 push/pop operations traced - First 4 mismatches logged with details - SuperSlab registration state logged - Status: ✅ Diagnostic tool (not a fix) 5. TLS Hint Box Removed - Deleted ss_tls_hint_box.{c,h} (Phase 1 optimization) - Simplified to focus on stability first - Status: ⏳ Can be re-added after root cause fixed Current Problem (REMAINS UNSOLVED): - [TLS_SLL_HDR_RESET] still occurs after ~60 seconds of sh8bench - Pointer is 16 bytes offset from expected (class 1 → class 2 boundary) - hak_super_lookup returns NULL for that pointer - Suggests: Use-After-Free, Double-Free, or pointer arithmetic error Root Cause Analysis: - Pattern: Pointer offset by +16 (one class 1 stride) - Timing: Cumulative problem (appears after 60s, not immediately) - Location: Header corruption detected during TLS SLL pop Remaining Issues: ⚠️ Registry fallback is defensive (may hide registration bugs) ⚠️ Push validation prevents symptoms but not root cause ⚠️ 16-byte pointer offset source unidentified Next Steps for Investigation: 1. Full pointer arithmetic audit (Magazine ⇔ TLS SLL paths) 2. Enhanced logging at HDR_RESET point: - Expected vs actual pointer value - Pointer provenance (where it came from) - Allocation trace for that block 3. Verify Headerless flag is OFF throughout build 4. Check for double-offset application in conversions Technical Assessment: - 60% root cause fixes (allocation class, validation) - 40% defensive mitigation (registry fallback, push rejection) Performance Impact: - Registry fallback: +10-30 cycles on cold path (negligible) - Push validation: +5-10 cycles per push (acceptable) - Overall: < 2% performance impact estimated Related Issues: - Phase 1 TLS Hint Box removed temporarily - Phase 2 Headerless blocked until stability achieved 🤖 Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
226 lines
8.0 KiB
C
226 lines
8.0 KiB
C
// ss_tls_hint_box.h - Phase 9-1-4: TLS Hints for SuperSlab Lookup
|
|
// Purpose: Cache last-used SuperSlab per class to eliminate hash table lookups
|
|
//
|
|
// Box Pattern:
|
|
// - Single Responsibility: TLS caching layer for SuperSlab lookups
|
|
// - Clear Contract: O(1) hint check, fallback to hash table on miss
|
|
// - Observable: Debug macros log hit/miss rates
|
|
// - Composable: Wraps ss_addr_map_box for fallback
|
|
//
|
|
// Performance Target:
|
|
// - Hit case: 5-10 cycles (TLS load + range check)
|
|
// - Miss case: 15-25 cycles (TLS update + hash table lookup)
|
|
// - Expected hit rate: 80-95% (locality of reference)
|
|
// - Net improvement: 50-80 cycles → 10-15 cycles average
|
|
//
|
|
// Design:
|
|
// - __thread SuperSlab* g_tls_ss_hint[TINY_NUM_CLASSES]
|
|
// - Each allocation/free updates hint for its size class
|
|
// - Quick range check: ptr >= base && ptr < base + size
|
|
// - Fallback to hash table on miss, update hint
|
|
|
|
#ifndef HAK_BOX_SS_TLS_HINT_H
|
|
#define HAK_BOX_SS_TLS_HINT_H
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include "../hakmem_build_flags.h"
|
|
#include "../hakmem_tiny_superslab.h"
|
|
#include "../hakmem_tiny_superslab_constants.h"
|
|
#include "ss_addr_map_box.h"
|
|
|
|
// Forward declaration
|
|
struct SuperSlab;
|
|
|
|
// ============================================================================
|
|
// TLS Hint Cache
|
|
// ============================================================================
|
|
|
|
// TLS cache: Most recently used SuperSlab per size class
|
|
// - Reduces hash table lookups by 80-95% (locality of reference)
|
|
// - Each thread maintains its own cache (no contention)
|
|
// - Invalidated automatically on SuperSlab free (future Phase 9-2)
|
|
#ifndef TINY_NUM_CLASSES
|
|
#define TINY_NUM_CLASSES 8 // Fallback if hakmem_tiny.h not included
|
|
#endif
|
|
|
|
extern __thread struct SuperSlab* g_tls_ss_hint[TINY_NUM_CLASSES];
|
|
|
|
// ============================================================================
|
|
// Statistics (Debug builds only)
|
|
// ============================================================================
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
typedef struct {
|
|
uint64_t total_lookups; // Total lookup calls
|
|
uint64_t hint_hits; // Successful TLS hint hits
|
|
uint64_t hint_misses; // TLS hint misses (fallback to hash table)
|
|
uint64_t hash_hits; // Successful hash table lookups
|
|
uint64_t hash_misses; // Hash table lookup failures (NULL)
|
|
} SSTLSHintStats;
|
|
|
|
extern __thread SSTLSHintStats g_tls_hint_stats;
|
|
|
|
// Print statistics (for profiling)
|
|
static inline void ss_tls_hint_print_stats(void) {
|
|
fprintf(stderr, "\n[SS_TLS_HINT_STATS] Thread-local SuperSlab Lookup Statistics:\n");
|
|
fprintf(stderr, " Total lookups: %lu\n", g_tls_hint_stats.total_lookups);
|
|
fprintf(stderr, " TLS hint hits: %lu (%.1f%%)\n",
|
|
g_tls_hint_stats.hint_hits,
|
|
100.0 * g_tls_hint_stats.hint_hits / (g_tls_hint_stats.total_lookups + 1));
|
|
fprintf(stderr, " TLS hint misses: %lu (%.1f%%)\n",
|
|
g_tls_hint_stats.hint_misses,
|
|
100.0 * g_tls_hint_stats.hint_misses / (g_tls_hint_stats.total_lookups + 1));
|
|
fprintf(stderr, " Hash table hits: %lu\n", g_tls_hint_stats.hash_hits);
|
|
fprintf(stderr, " Hash table misses: %lu\n", g_tls_hint_stats.hash_misses);
|
|
|
|
uint64_t total_misses = g_tls_hint_stats.hint_misses + g_tls_hint_stats.hash_misses;
|
|
fprintf(stderr, " Overall hit rate: %.1f%%\n",
|
|
100.0 * (g_tls_hint_stats.hint_hits + g_tls_hint_stats.hash_hits) /
|
|
(g_tls_hint_stats.total_lookups + 1));
|
|
}
|
|
#endif
|
|
|
|
// ============================================================================
|
|
// API Functions
|
|
// ============================================================================
|
|
|
|
// Initialize TLS hints (call once per thread)
|
|
static inline void ss_tls_hint_init(void) {
|
|
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
|
|
g_tls_ss_hint[i] = NULL;
|
|
}
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
g_tls_hint_stats.total_lookups = 0;
|
|
g_tls_hint_stats.hint_hits = 0;
|
|
g_tls_hint_stats.hint_misses = 0;
|
|
g_tls_hint_stats.hash_hits = 0;
|
|
g_tls_hint_stats.hash_misses = 0;
|
|
#endif
|
|
}
|
|
|
|
// Check if pointer is within SuperSlab range
|
|
// Fast inline range check: ptr >= base && ptr < base + size
|
|
static inline int ss_contains(struct SuperSlab* ss, void* ptr) {
|
|
if (!ss) return 0;
|
|
|
|
uintptr_t p = (uintptr_t)ptr;
|
|
uintptr_t base = (uintptr_t)ss;
|
|
uintptr_t size = (1UL << ss->lg_size);
|
|
|
|
return (p >= base) && (p < base + size);
|
|
}
|
|
|
|
// Lookup SuperSlab with TLS hint
|
|
// - class_idx: Size class index (0-7 for Tiny classes)
|
|
// - ptr: Pointer to look up
|
|
// Returns: SuperSlab* if found, NULL otherwise
|
|
//
|
|
// Contract: O(1) amortized lookup with TLS caching
|
|
// - Fast path: 5-10 cycles (TLS hint hit)
|
|
// - Slow path: 15-25 cycles (hash table lookup + hint update)
|
|
static inline struct SuperSlab* ss_tls_hint_lookup(int class_idx, void* ptr) {
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
g_tls_hint_stats.total_lookups++;
|
|
#endif
|
|
|
|
// Bounds check
|
|
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
|
return NULL;
|
|
}
|
|
|
|
// Fast path: Check TLS hint
|
|
struct SuperSlab* hint = g_tls_ss_hint[class_idx];
|
|
if (__builtin_expect(hint != NULL, 1)) {
|
|
if (__builtin_expect(ss_contains(hint, ptr), 1)) {
|
|
// TLS hint hit!
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
g_tls_hint_stats.hint_hits++;
|
|
|
|
static __thread int s_verbose = -1;
|
|
if (__builtin_expect(s_verbose == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_SS_TLS_HINT_TRACE");
|
|
s_verbose = (e && *e && *e != '0') ? 1 : 0;
|
|
}
|
|
if (s_verbose) {
|
|
fprintf(stderr, "[SS_TLS_HINT] HIT: class=%d ptr=%p ss=%p\n",
|
|
class_idx, ptr, (void*)hint);
|
|
}
|
|
#endif
|
|
return hint;
|
|
}
|
|
}
|
|
|
|
// Slow path: TLS hint miss, fallback to hash table
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
g_tls_hint_stats.hint_misses++;
|
|
|
|
static __thread int s_verbose = -1;
|
|
if (__builtin_expect(s_verbose == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_SS_TLS_HINT_TRACE");
|
|
s_verbose = (e && *e && *e != '0') ? 1 : 0;
|
|
}
|
|
if (s_verbose) {
|
|
fprintf(stderr, "[SS_TLS_HINT] MISS: class=%d ptr=%p (hint=%p)\n",
|
|
class_idx, ptr, (void*)hint);
|
|
}
|
|
#endif
|
|
|
|
// Lookup in hash table
|
|
struct SuperSlab* ss = ss_map_lookup(&g_ss_addr_map, ptr);
|
|
|
|
if (ss) {
|
|
// Update TLS hint for next time
|
|
g_tls_ss_hint[class_idx] = ss;
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
g_tls_hint_stats.hash_hits++;
|
|
if (s_verbose) {
|
|
fprintf(stderr, "[SS_TLS_HINT] HASH_HIT: class=%d ptr=%p ss=%p (hint updated)\n",
|
|
class_idx, ptr, (void*)ss);
|
|
}
|
|
#endif
|
|
} else {
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
g_tls_hint_stats.hash_misses++;
|
|
if (s_verbose) {
|
|
fprintf(stderr, "[SS_TLS_HINT] HASH_MISS: class=%d ptr=%p (not found)\n",
|
|
class_idx, ptr);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
return ss;
|
|
}
|
|
|
|
// Invalidate TLS hint for a specific class
|
|
// Call this when freeing a SuperSlab to prevent dangling pointer
|
|
static inline void ss_tls_hint_invalidate(int class_idx, struct SuperSlab* ss) {
|
|
if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
|
|
if (g_tls_ss_hint[class_idx] == ss) {
|
|
g_tls_ss_hint[class_idx] = NULL;
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
static __thread int s_verbose = -1;
|
|
if (__builtin_expect(s_verbose == -1, 0)) {
|
|
const char* e = getenv("HAKMEM_SS_TLS_HINT_TRACE");
|
|
s_verbose = (e && *e && *e != '0') ? 1 : 0;
|
|
}
|
|
if (s_verbose) {
|
|
fprintf(stderr, "[SS_TLS_HINT] INVALIDATE: class=%d ss=%p\n",
|
|
class_idx, (void*)ss);
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
// Prefill TLS hint (for hot path optimization)
|
|
// Call after allocating from a SuperSlab to warm up cache
|
|
static inline void ss_tls_hint_update(int class_idx, struct SuperSlab* ss) {
|
|
if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES && ss != NULL) {
|
|
g_tls_ss_hint[class_idx] = ss;
|
|
}
|
|
}
|
|
|
|
#endif // HAK_BOX_SS_TLS_HINT_H
|