Files
hakmem/core/hakmem_internal.h
Moe Charm (CI) 707056b765 feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓

Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
  Result: +180-280% improvement, 85-146% of System malloc

Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)

Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
  Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
  Result: 50% → 95% stability (19/20 4T success)

Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
  Files: core/tiny_adaptive_sizing.c/h (new)

Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
  Files: core/hakmem_bigcache.c/h
  Expected: +10-20% cache hit rate

Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)

Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis

Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files

Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)

🤖 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00

455 lines
16 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem_internal.h - Internal Implementation Helpers (static inline)
// Purpose: Separate implementation details from public API using zero-cost abstraction
//
// Design Philosophy:
// - All functions are `static inline` → Zero overhead (100% inlined with -O2)
// - Type-safe (unlike macros)
// - Debuggable (unlike macros)
// - Readable (unlike macros)
//
// This file should be #include'd by hakmem.c ONLY (not a public header)
#ifndef HAKMEM_INTERNAL_H
#define HAKMEM_INTERNAL_H
#include "hakmem.h"
#include "hakmem_config.h"
#include "hakmem_sys.h" // Phase 6.11.1: Syscall wrappers with timing
#include "hakmem_whale.h" // Phase 6.11.1: Whale fast-path cache
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h> // Phase 7: errno for OOM handling
#include <sys/mman.h> // For mincore, madvise
#include <unistd.h> // For sysconf
// Exposed runtime mode: set to 1 when loaded via LD_PRELOAD (libhakmem.so)
extern int g_ldpreload_mode;
// ============================================================================
// Phase 6.15 P0.1: Debug Logging Control
// ============================================================================
// Compile-time control: HAKMEM_DEBUG_VERBOSE (default OFF for performance)
// Runtime control: HAKMEM_QUIET environment variable (only for debug builds)
//
// Build modes:
// Release (default): make shared → No logs (HAKMEM_LOG compiled out)
// Debug: make debug → Logs enabled (unless HAKMEM_QUIET=1)
// Debug quiet: HAKMEM_QUIET=1 ... → Logs suppressed at runtime
#ifdef HAKMEM_DEBUG_VERBOSE
// Debug build: Check HAKMEM_QUIET at runtime
#define HAKMEM_LOG(fmt, ...) do { \
static int quiet_checked = 0; \
static int quiet_mode = 0; \
if (!quiet_checked) { \
char* env = getenv("HAKMEM_QUIET"); \
quiet_mode = (env && strcmp(env, "1") == 0); \
quiet_checked = 1; \
} \
if (!quiet_mode) { \
fprintf(stderr, "[hakmem] " fmt, ##__VA_ARGS__); \
} \
} while(0)
#else
// Release build: Compile out all logs (zero overhead)
#define HAKMEM_LOG(fmt, ...) ((void)0)
#endif
#ifdef __linux__
#include <sys/mman.h>
#include <unistd.h>
// MADV_FREE support (Linux kernel 4.5+)
#ifndef MADV_FREE
#define MADV_FREE 8
#endif
// Fallback for MADV_DONTNEED if not defined (Linux usually defines 4)
#ifndef MADV_DONTNEED
#define MADV_DONTNEED 4
#endif
// THP support
#ifndef MADV_HUGEPAGE
#define MADV_HUGEPAGE 14
#endif
#ifndef MADV_NOHUGEPAGE
#define MADV_NOHUGEPAGE 15
#endif
#endif
// ===========================================================================
// Internal Constants
// ===========================================================================
#define HAKMEM_MAGIC 0x48414B4D // "HAKM" in ASCII (uint32_t)
#define HEADER_SIZE sizeof(AllocHeader)
// THP thresholds (from config)
#define THP_THRESHOLD (2 * 1024 * 1024) // 2MB
// Thermal thresholds (from Phase 6.4 P1)
#define THERMAL_COLD_THRESHOLD (2 * 1024 * 1024) // 2MB
#define THERMAL_WARM_THRESHOLD (1 * 1024 * 1024) // 1MB
// ===========================================================================
// Internal Types
// ===========================================================================
typedef enum {
ALLOC_METHOD_MALLOC = 0,
ALLOC_METHOD_MMAP = 1,
ALLOC_METHOD_POOL = 2, // Phase 6.9.1: L2 Pool allocations (2-32KB)
ALLOC_METHOD_L25_POOL = 3, // Phase 6.13: L2.5 Pool allocations (64KB-1MB)
} AllocMethod;
typedef struct {
uint32_t magic; // Magic number for validation
AllocMethod method; // Allocation method (malloc/mmap)
size_t size; // Original size (for munmap)
uintptr_t alloc_site; // Call-site address
size_t class_bytes; // Size class for caching (0=no cache)
uintptr_t owner_tid; // Owning thread (for Mid/Tiny per-thread fast path). 0 if unknown
} AllocHeader;
typedef enum {
FREE_THERMAL_HOT, // すぐ再利用 → 何もしないKEEP
FREE_THERMAL_WARM, // 中間 → MADV_FREEmunmapしない
FREE_THERMAL_COLD // 長期未使用 → batchDONTNEED
} FreeThermal;
// ===========================================================================
// Thermal Classification (Phase 6.4 P1)
// ===========================================================================
// Classify allocation thermal state based on size
// Args: size - allocation size in bytes
// Returns: FreeThermal enum (HOT/WARM/COLD)
//
// Thermal States:
// - HOT (< 1MB): Likely to be reused soon → keep VA mapped
// - WARM (1-2MB): Medium reuse → MADV_FREE (return physical pages only)
// - COLD (>= 2MB): Low reuse → batch DONTNEED (return VA + physical)
//
// Used by FREE_POLICY_ADAPTIVE to optimize memory release strategy
static inline FreeThermal hak_classify_thermal(size_t size) {
if (size >= THERMAL_COLD_THRESHOLD) {
return FREE_THERMAL_COLD; // >= 2MB → COLD
} else if (size >= THERMAL_WARM_THRESHOLD) {
return FREE_THERMAL_WARM; // 1MB-2MB → WARM
} else {
return FREE_THERMAL_HOT; // < 1MB → HOT
}
}
// ===========================================================================
// THP Policy Application (Phase 6.4 P4)
// ===========================================================================
// Apply Transparent Huge Pages (THP) policy to mmap'd region
// Args: ptr - pointer to mmap'd memory region
// size - size of region in bytes
//
// THP Policies:
// - THP_POLICY_OFF: MADV_NOHUGEPAGE for all (disable THP)
// - THP_POLICY_AUTO: MADV_HUGEPAGE for >= 2MB only (default, balanced)
// - THP_POLICY_ON: MADV_HUGEPAGE for all >= 1MB (aggressive)
//
// Benefits of THP:
// - Reduced TLB misses (2MB pages vs 4KB pages = 512x reduction)
// - Improved cache locality
// - Lower page table overhead
//
// Set via HAKMEM_THP environment variable
static inline void hak_apply_thp_policy(void* ptr, size_t size) {
#ifdef __linux__
if (!ptr) return; // Safety check
THPPolicy policy = g_hakem_config.thp_policy;
if (policy == THP_POLICY_OFF) {
madvise(ptr, size, MADV_NOHUGEPAGE);
} else if (policy == THP_POLICY_ON) {
madvise(ptr, size, MADV_HUGEPAGE);
} else { // AUTO
if (size >= THP_THRESHOLD) {
madvise(ptr, size, MADV_HUGEPAGE); // >= 2MB → THP
} else {
madvise(ptr, size, MADV_NOHUGEPAGE); // < 2MB → no THP
}
}
#else
(void)ptr;
(void)size;
#endif
}
// ===========================================================================
// Allocation Strategies (static inline = zero overhead)
// ===========================================================================
// Strategy 1: malloc (for small/medium allocations)
// Args: size - requested allocation size (user bytes, excluding header)
// Returns: User pointer (after header), or NULL on failure
//
// Implementation:
// - Allocates HEADER_SIZE + size using system malloc()
// - Writes AllocHeader with MALLOC method
// - Returns pointer after header (user-visible pointer)
// - O(1) allocation with kernel slab allocator (< 2MB)
static inline void* hak_alloc_malloc_impl(size_t size) {
// PHASE 7 CRITICAL FIX: malloc fallback removed (root cause of 4T crash)
//
// WHY: Mixed HAKMEM/libc allocations cause "free(): invalid pointer" crashes
// - libc malloc adds its own metadata (8-16B)
// - HAKMEM adds AllocHeader on top (16-32B total overhead!)
// - free() confusion leads to double-free/invalid pointer crashes
//
// SOLUTION: Return NULL explicitly to force OOM handling
// SuperSlab should dynamically scale instead of falling back
//
// To enable fallback for debugging ONLY (not for production!):
// export HAKMEM_ALLOW_MALLOC_FALLBACK=1
static int allow_fallback = -1;
if (allow_fallback < 0) {
char* env = getenv("HAKMEM_ALLOW_MALLOC_FALLBACK");
allow_fallback = (env && atoi(env) != 0) ? 1 : 0;
}
if (!allow_fallback) {
// Malloc fallback disabled (production mode)
static _Atomic int warn_count = 0;
int count = atomic_fetch_add(&warn_count, 1);
if (count < 3) {
fprintf(stderr, "[HAKMEM] WARNING: malloc fallback disabled (size=%zu), returning NULL (OOM)\n", size);
fprintf(stderr, "[HAKMEM] This may indicate SuperSlab exhaustion. Set HAKMEM_ALLOW_MALLOC_FALLBACK=1 to debug.\n");
}
errno = ENOMEM;
return NULL; // Explicit OOM
}
// Fallback path (DEBUGGING ONLY - should not be used in production!)
if (!HAK_ENABLED_ALLOC(HAKMEM_FEATURE_MALLOC)) {
return NULL; // malloc disabled
}
// Warn about fallback usage
static _Atomic int fallback_warn_count = 0;
int fb_count = atomic_fetch_add(&fallback_warn_count, 1);
if (fb_count < 3) {
fprintf(stderr, "[HAKMEM] DEBUG: Using libc malloc fallback (size=%zu) - NOT RECOMMENDED FOR PRODUCTION!\n", size);
}
// Allocate space for header + user data
// CRITICAL: Must use __libc_malloc to avoid infinite recursion through wrapper
extern void* __libc_malloc(size_t);
void* raw = __libc_malloc(HEADER_SIZE + size);
if (!raw) return NULL;
// Write header
AllocHeader* hdr = (AllocHeader*)raw;
hdr->magic = HAKMEM_MAGIC;
hdr->method = ALLOC_METHOD_MALLOC;
hdr->size = size;
hdr->alloc_site = 0; // Set by caller (hak_alloc_at)
hdr->class_bytes = 0; // Set by caller if cacheable
// Return user pointer (skip header)
return (char*)raw + HEADER_SIZE;
}
// Strategy 2: mmap (for large allocations)
// Args: size - requested allocation size (user bytes, excluding header)
// Returns: User pointer (after header), or NULL on failure
//
// Implementation:
// - Rounds up (HEADER_SIZE + size) to page boundary
// - Uses mmap(MAP_ANONYMOUS) for zero-overhead allocation
// - Applies THP policy (MADV_HUGEPAGE/NOHUGEPAGE)
// - Stores aligned_size in header->size (for munmap)
// - O(1) allocation with kernel buddy allocator (>= 2MB)
static inline void* hak_alloc_mmap_impl(size_t size) {
#ifdef __linux__
// Feature check
if (!HAK_ENABLED_ALLOC(HAKMEM_FEATURE_MMAP)) {
return NULL; // mmap disabled, fallback to malloc
}
// Round up to page size (header + user data)
long page_size = sysconf(_SC_PAGESIZE);
size_t total_size = HEADER_SIZE + size;
size_t aligned_size = (total_size + page_size - 1) & ~(page_size - 1);
// Phase 6.11.1: Try whale cache first (for ≥2MB allocations)
void* raw = hkm_whale_get(aligned_size);
if (!raw) {
// Whale cache miss: allocate via mmap
raw = hkm_sys_mmap(aligned_size);
if (!raw) {
return NULL;
}
}
// else: Whale cache hit! Reuse existing mapping (no mmap syscall)
// Apply THP policy (Phase 6.4 P4)
hak_apply_thp_policy(raw, aligned_size);
// Write header
AllocHeader* hdr = (AllocHeader*)raw;
hdr->magic = HAKMEM_MAGIC;
hdr->method = ALLOC_METHOD_MMAP;
hdr->size = aligned_size; // Store aligned size for munmap
hdr->alloc_site = 0; // Set by caller (hak_alloc_at)
hdr->class_bytes = 0; // Set by caller if cacheable
// Return user pointer (skip header)
return (char*)raw + HEADER_SIZE;
#else
// Fallback to malloc on non-Linux
return hak_alloc_malloc_impl(size);
#endif
}
// ===========================================================================
// Memory Safety Helpers
// ===========================================================================
// hak_is_memory_readable: Check if memory address is accessible before dereferencing
// CRITICAL FIX (2025-11-07): Prevents SEGV when checking header magic on unmapped memory
//
// PERFORMANCE WARNING (Phase 7-1.3, 2025-11-08):
// This function is EXPENSIVE (~634 cycles via mincore syscall on Linux).
// DO NOT call this on every free() - use alignment check first to avoid overhead!
//
// Recommended Pattern (Hybrid Approach):
// if (((uintptr_t)ptr & 0xFFF) == 0) {
// // Page boundary (0.1% case) - do safety check
// if (!hak_is_memory_readable(ptr)) { /* handle page boundary */ }
// }
// // Normal case (99.9%): ptr is safe to read (no mincore call!)
//
// Performance Impact:
// - Without hybrid: 634 cycles on EVERY free
// - With hybrid: 1-2 cycles effective (99.9% × 1 + 0.1% × 634)
// - Improvement: 317-634x faster!
//
// See: PHASE7_DESIGN_REVIEW.md, Section 1.1 for full analysis
static inline int hak_is_memory_readable(void* addr) {
#ifdef __linux__
unsigned char vec;
// mincore returns 0 if page is mapped, -1 (ENOMEM) if not
// MEASURED COST: ~634 cycles (Phase 7-1.2 micro-benchmark)
return mincore(addr, 1, &vec) == 0;
#else
// Non-Linux: assume accessible (conservative fallback)
// TODO: Add platform-specific checks for BSD, macOS, Windows
return 1;
#endif
}
// ===========================================================================
// Header Helpers (with NULL safety)
// ===========================================================================
// Get raw pointer (before header) from user pointer
// Returns: Raw allocation pointer (header starts here)
static inline void* hak_header_get_raw(void* user_ptr) {
if (!user_ptr) return NULL;
return (char*)user_ptr - HEADER_SIZE;
}
// Get header from user pointer
// Returns: Pointer to AllocHeader, or NULL if user_ptr is NULL
static inline AllocHeader* hak_header_from_user(void* user_ptr) {
if (!user_ptr) return NULL;
return (AllocHeader*)hak_header_get_raw(user_ptr);
}
// Validate header magic number
// Returns: 1 if valid, 0 if invalid or NULL
static inline int hak_header_validate(AllocHeader* hdr) {
if (!hdr) return 0;
return hdr->magic == HAKMEM_MAGIC;
}
// Set allocation site in header (for cache key)
static inline void hak_header_set_site(void* user_ptr, uintptr_t site_id) {
AllocHeader* hdr = hak_header_from_user(user_ptr);
if (hdr) {
hdr->alloc_site = site_id;
}
}
// Set size class in header (for BigCache)
static inline void hak_header_set_class(void* user_ptr, size_t class_bytes) {
AllocHeader* hdr = hak_header_from_user(user_ptr);
if (hdr) {
hdr->class_bytes = class_bytes;
}
}
// ===========================================================================
// Free Strategies (static inline = zero overhead)
// ===========================================================================
// Free malloc-allocated block
// Args: raw - pointer to raw allocation (including header)
static inline void hak_free_malloc_impl(void* raw) {
if (!raw) return; // Safety check
free(raw);
}
// Free mmap-allocated block
// Args: raw - pointer to raw allocation (including header)
// size - aligned size (from header->size)
static inline void hak_free_mmap_impl(void* raw, size_t size) {
if (!raw) return; // Safety check
#ifdef __linux__
munmap(raw, size);
#else
free(raw); // Fallback on non-Linux
#endif
}
// Apply Hot/Warm/Cold free policy (Phase 6.4 P1)
// Args: raw - pointer to raw allocation (including header)
// size - allocated size
// thermal - thermal classification (HOT/WARM/COLD)
// Returns: 1 if handled (no further action needed), 0 if caller should continue (batch/direct free)
static inline int hak_free_with_thermal_policy(void* raw, size_t size, FreeThermal thermal) {
if (!raw) return 1; // NULL is always "handled" (no-op)
FreePolicy policy = g_hakem_config.free_policy;
if (policy == FREE_POLICY_KEEP) {
// KEEP: 何もしないVA保持、madviseもしない
return 1; // Handled (kept)
} else if (policy == FREE_POLICY_ADAPTIVE) {
// ADAPTIVE: Hot/Warm/Cold判定
switch (thermal) {
case FREE_THERMAL_HOT:
// HOT (< 1MB): 何もしない(すぐ再利用される)
return 1; // Handled (kept)
case FREE_THERMAL_WARM:
// WARM (1-2MB): MADV_FREEmunmapしない、物理ページのみ返す
#ifdef __linux__
madvise(raw, size, MADV_FREE);
#endif
return 1; // Handled
case FREE_THERMAL_COLD:
// COLD (>= 2MB): batch既存の処理
return 0; // Not handled, caller should use batch
}
}
// FREE_POLICY_BATCH (default): caller handles
return 0; // Not handled
}
#endif // HAKMEM_INTERNAL_H