Files
hakmem/core/hakmem_tiny_metadata.inc
Moe Charm (CI) 9b0d746407 Phase 3d-B: TLS Cache Merge - Unified g_tls_sll[] structure (+12-18% expected)
Merge separate g_tls_sll_head[] and g_tls_sll_count[] arrays into unified
TinyTLSSLL struct to improve L1D cache locality. Expected performance gain:
+12-18% from reducing cache line splits (2 loads → 1 load per operation).

Changes:
- core/hakmem_tiny.h: Add TinyTLSSLL type (16B aligned, head+count+pad)
- core/hakmem_tiny.c: Replace separate arrays with g_tls_sll[8]
- core/box/tls_sll_box.h: Update Box API (13 sites) for unified access
- Updated 32+ files: All g_tls_sll_head[i] → g_tls_sll[i].head
- Updated 32+ files: All g_tls_sll_count[i] → g_tls_sll[i].count
- core/hakmem_tiny_integrity.h: Unified canary guards
- core/box/integrity_box.c: Simplified canary validation
- Makefile: Added core/box/tiny_sizeclass_hist_box.o to link

Build:  PASS (10K ops sanity test)
Warnings: Only pre-existing LTO type mismatches (unrelated)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-20 07:32:30 +09:00

228 lines
8.5 KiB
C++

// hakmem_tiny_metadata.inc
// Phase 6-1.6: Metadata-Based Fast Path (SKELETON - WIP)
//
// ⚠️ STATUS: Framework committed, full implementation pending
// ✅ DONE: Fast path alloc/free with metadata header
// ⏳ TODO: Refill logic to allocate +8 bytes for header
// ⏳ TODO: Slow path to initialize metadata
// ⏳ TODO: Mid/L25/Whale pool integration
//
// Design: Store 8-byte header before each allocation
// - pool_type: Which pool owns this (Tiny=0, Mid=1, L25=2, Whale=3)
// - size_class: Size class index (0-7 for Tiny)
// - magic: Validation (0xABCD)
// - alloc_size: Original requested size (for realloc/debugging)
//
// Memory overhead: ~6-12% for typical workloads
// Performance gain: 2x (eliminates alignment guessing overhead)
//
// Key advantage: Extends to ALL pools (Tiny/Mid/L25/Whale)
// - Eliminates hak_pool_mid_lookup() (12% CPU)
// - Eliminates registry lookups (10% CPU)
// - Enables instant pool type detection (1 load instruction)
#ifndef HAKMEM_TINY_METADATA_INC
#define HAKMEM_TINY_METADATA_INC
#include "box/tls_sll_box.h" // Box TLS-SLL API
// ============================================================================
// Phase 6-1.6: Universal Allocation Header
// ============================================================================
// Used by ALL pools for instant ownership identification
struct hak_alloc_hdr {
uint8_t pool_type; // 0=Tiny, 1=Mid, 2=L25, 3=Whale
uint8_t size_class; // Size class within pool
uint16_t magic; // 0xABCD (validation)
uint32_t alloc_size; // Original requested size
} __attribute__((packed));
#define HAK_HDR_MAGIC 0xABCD
#define HAK_POOL_TYPE_TINY 0
#define HAK_POOL_TYPE_MID 1
#define HAK_POOL_TYPE_L25 2
#define HAK_POOL_TYPE_WHALE 3
// Convert user pointer to header
static inline struct hak_alloc_hdr* hak_get_hdr(void* user_ptr) {
return (struct hak_alloc_hdr*)((char*)user_ptr - sizeof(struct hak_alloc_hdr));
}
// Convert header to user pointer
static inline void* hak_hdr_to_user(struct hak_alloc_hdr* hdr) {
return (void*)((char*)hdr + sizeof(struct hak_alloc_hdr));
}
// ============================================================================
// Phase 6-1.6.1: Tiny Pool Metadata Fast Path
// ============================================================================
// Forward declarations for external TLS variables and functions
// Phase 3d-B: TLS Cache Merge - Unified TLS SLL structure
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
static __thread int g_metadata_alloc_called = 0;
static __thread int g_metadata_free_called = 0;
// ============================================================================
// Ultra-Fast Metadata Alloc (Phase 6-1.6 style)
// ============================================================================
// Same as Phase 6-1.5 but stores metadata header
void* hak_tiny_alloc_metadata(size_t size) {
// DEBUG: Mark that we're using metadata path (disabled in release)
#ifdef HAKMEM_DEBUG_VERBOSE
if (!g_metadata_alloc_called) {
fprintf(stderr, "[PHASE 6-1.6] Metadata-based path ACTIVE!\\n");
g_metadata_alloc_called = 1;
}
#endif
// 1. Size → class (inline function, existing)
int class_idx = hak_tiny_size_to_class(size);
if (__builtin_expect(class_idx < 0, 0)) {
return NULL; // >1KB
}
// 2. Ultra-fast path: Pop from existing TLS SLL
// NOTE: We allocate 8 bytes EXTRA for header
// The SLL stores pointers to HEADERS, not user pointers
void* hdr_ptr = NULL;
if (tls_sll_pop(class_idx, &hdr_ptr)) {
// Initialize header
struct hak_alloc_hdr* hdr = (struct hak_alloc_hdr*)hdr_ptr;
hdr->pool_type = HAK_POOL_TYPE_TINY;
hdr->size_class = class_idx;
hdr->magic = HAK_HDR_MAGIC;
hdr->alloc_size = size;
// Return user pointer (header + 8 bytes)
void* user_ptr = hak_hdr_to_user(hdr);
HAK_RET_ALLOC(class_idx, user_ptr);
}
// 3. Miss: Refill from existing SuperSlab infrastructure
// ⚠️ TODO (Phase 6-1.6.1): Modify refill to allocate HEADER_SIZE extra!
// Current sll_refill_small_from_ss() allocates class_size bytes
// But metadata version needs class_size + 8 bytes
// For now, this will FAIL - needs refill logic update
int refill_count = 64;
#if HAKMEM_TINY_P0_BATCH_REFILL
if (sll_refill_batch_from_ss(class_idx, refill_count) > 0) {
#else
if (sll_refill_small_from_ss(class_idx, refill_count) > 0) {
#endif
if (tls_sll_pop(class_idx, &hdr_ptr)) {
struct hak_alloc_hdr* hdr = (struct hak_alloc_hdr*)hdr_ptr;
hdr->pool_type = HAK_POOL_TYPE_TINY;
hdr->size_class = class_idx;
hdr->magic = HAK_HDR_MAGIC;
hdr->alloc_size = size;
void* user_ptr = hak_hdr_to_user(hdr);
HAK_RET_ALLOC(class_idx, user_ptr);
}
}
// 4. Fallback to slow path
// NOTE: Slow path needs to allocate +8 bytes and initialize header
void* slow_ptr = hak_tiny_alloc_slow(size, class_idx);
if (slow_ptr) {
HAK_RET_ALLOC(class_idx, slow_ptr);
}
return slow_ptr;
}
// ============================================================================
// Ultra-Fast Metadata Free (Phase 6-1.6 style)
// ============================================================================
// INSTANT pool type detection via metadata header (1 load instruction!)
void hak_tiny_free_metadata(void* user_ptr) {
// DEBUG: Mark that we're using metadata free path (disabled in release)
#ifdef HAKMEM_DEBUG_VERBOSE
if (!g_metadata_free_called) {
fprintf(stderr, "[PHASE 6-1.6] Metadata-based FREE path ACTIVE!\\n");
g_metadata_free_called = 1;
}
#endif
// ========================================================================
// ULTRA-FAST PATH: Load metadata header (1 instruction!)
// ========================================================================
struct hak_alloc_hdr* hdr = hak_get_hdr(user_ptr);
// Validation: Check magic
if (__builtin_expect(hdr->magic != HAK_HDR_MAGIC, 0)) {
// Invalid header → fallback to slow path
#ifdef HAKMEM_DEBUG_VERBOSE
fprintf(stderr, "[PHASE 6-1.6] WARNING: Invalid magic 0x%04x (expected 0x%04x)\\n",
hdr->magic, HAK_HDR_MAGIC);
#endif
hak_free_at(user_ptr, 0, 0);
return;
}
// Check pool type
if (__builtin_expect(hdr->pool_type != HAK_POOL_TYPE_TINY, 0)) {
// Not Tiny → delegate to appropriate pool
switch (hdr->pool_type) {
case HAK_POOL_TYPE_MID:
// TODO: hak_pool_mid_free_metadata(user_ptr, hdr);
#ifdef HAKMEM_DEBUG_VERBOSE
fprintf(stderr, "[PHASE 6-1.6] Mid pool free (not implemented yet)\\n");
#endif
hak_free_at(user_ptr, 0, 0);
return;
case HAK_POOL_TYPE_L25:
// TODO: hak_l25_pool_free_metadata(user_ptr, hdr);
#ifdef HAKMEM_DEBUG_VERBOSE
fprintf(stderr, "[PHASE 6-1.6] L25 pool free (not implemented yet)\\n");
#endif
hak_free_at(user_ptr, 0, 0);
return;
case HAK_POOL_TYPE_WHALE:
// TODO: Direct munmap
#ifdef HAKMEM_DEBUG_VERBOSE
fprintf(stderr, "[PHASE 6-1.6] Whale free (not implemented yet)\\n");
#endif
hak_free_at(user_ptr, 0, 0);
return;
default:
// Unknown pool type → error
#ifdef HAKMEM_DEBUG_VERBOSE
fprintf(stderr, "[PHASE 6-1.6] ERROR: Unknown pool_type %d\\n", hdr->pool_type);
#endif
hak_free_at(user_ptr, 0, 0);
return;
}
}
// ========================================================================
// TINY POOL FAST PATH: Direct TLS SLL push (Phase 6-1 speed!)
// ========================================================================
int class_idx = hdr->size_class;
// Bounds check
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
#ifdef HAKMEM_DEBUG_VERBOSE
fprintf(stderr, "[PHASE 6-1.6] ERROR: Invalid class_idx %d\\n", class_idx);
#endif
hak_free_at(user_ptr, 0, 0);
return;
}
// Push HEADER pointer to SLL (not user pointer!)
// Use Box TLS-SLL API (C7-safe)
if (!tls_sll_push(class_idx, hdr, UINT32_MAX)) {
// C7 rejected or capacity exceeded - use slow path
hak_free_at(user_ptr, 0, 0);
return;
}
// Done! No owner lookup, no registry, no locks!
}
#endif // HAKMEM_TINY_METADATA_INC