// hakmem_internal.h - Internal Implementation Helpers (static inline) // Purpose: Separate implementation details from public API using zero-cost abstraction // // Design Philosophy: // - All functions are `static inline` → Zero overhead (100% inlined with -O2) // - Type-safe (unlike macros) // - Debuggable (unlike macros) // - Readable (unlike macros) // // This file should be #include'd by hakmem.c ONLY (not a public header) #ifndef HAKMEM_INTERNAL_H #define HAKMEM_INTERNAL_H #include "hakmem.h" #include "hakmem_config.h" #include "hakmem_sys.h" // Phase 6.11.1: Syscall wrappers with timing #include "hakmem_whale.h" // Phase 6.11.1: Whale fast-path cache #include #include #include #include // Phase 7: errno for OOM handling #include // For mincore, madvise #include // For sysconf #include // Exposed runtime mode: set to 1 when loaded via LD_PRELOAD (libhakmem.so) extern int g_ldpreload_mode; // ============================================================================ // Phase 6.15 P0.1: Debug Logging Control // ============================================================================ // Compile-time control: HAKMEM_DEBUG_VERBOSE (default OFF for performance) // Runtime control: HAKMEM_QUIET environment variable (only for debug builds) // // Build modes: // Release (default): make shared → No logs (HAKMEM_LOG compiled out) // Debug: make debug → Logs enabled (unless HAKMEM_QUIET=1) // Debug quiet: HAKMEM_QUIET=1 ... → Logs suppressed at runtime #if HAKMEM_DEBUG_VERBOSE // Debug build: Check HAKMEM_QUIET at runtime #define HAKMEM_LOG(fmt, ...) do { \ static int quiet_checked = 0; \ static int quiet_mode = 0; \ if (!quiet_checked) { \ char* env = getenv("HAKMEM_QUIET"); \ quiet_mode = (env && strcmp(env, "1") == 0); \ quiet_checked = 1; \ } \ if (!quiet_mode) { \ fprintf(stderr, "[hakmem] " fmt, ##__VA_ARGS__); \ } \ } while(0) #else // Release build: Compile out all logs (zero overhead) #define HAKMEM_LOG(fmt, ...) ((void)0) #endif #ifdef __linux__ #include #include // MADV_FREE support (Linux kernel 4.5+) #ifndef MADV_FREE #define MADV_FREE 8 #endif // Fallback for MADV_DONTNEED if not defined (Linux usually defines 4) #ifndef MADV_DONTNEED #define MADV_DONTNEED 4 #endif // THP support #ifndef MADV_HUGEPAGE #define MADV_HUGEPAGE 14 #endif #ifndef MADV_NOHUGEPAGE #define MADV_NOHUGEPAGE 15 #endif #endif // =========================================================================== // Internal Constants // =========================================================================== #define HAKMEM_MAGIC 0x48414B4D // "HAKM" in ASCII (uint32_t) #define HEADER_SIZE sizeof(AllocHeader) #define HAKMEM_FG_GUARD_BYTE 0x5Au // Ensure front gate never misclassifies mid/large as Tiny // THP thresholds (from config) #define THP_THRESHOLD (2 * 1024 * 1024) // 2MB // Thermal thresholds (from Phase 6.4 P1) #define THERMAL_COLD_THRESHOLD (2 * 1024 * 1024) // 2MB #define THERMAL_WARM_THRESHOLD (1 * 1024 * 1024) // 1MB // =========================================================================== // Internal Types // =========================================================================== typedef enum { ALLOC_METHOD_MALLOC = 0, ALLOC_METHOD_MMAP = 1, ALLOC_METHOD_POOL = 2, // Phase 6.9.1: L2 Pool allocations (2-32KB) ALLOC_METHOD_L25_POOL = 3, // Phase 6.13: L2.5 Pool allocations (64KB-1MB) } AllocMethod; typedef struct { uint32_t magic; // Magic number for validation AllocMethod method; // Allocation method (malloc/mmap) size_t size; // Original size (for munmap) uintptr_t alloc_site; // Call-site address size_t class_bytes; // Size class for caching (0=no cache) uintptr_t owner_tid; // Owning thread (for Mid/Tiny per-thread fast path). 0 if unknown } AllocHeader; // Phase 10: Pointer Type Safety #include "box/ptr_type_box.h" typedef enum { FREE_THERMAL_HOT, // すぐ再利用 → 何もしない(KEEP) FREE_THERMAL_WARM, // 中間 → MADV_FREE(munmapしない) FREE_THERMAL_COLD // 長期未使用 → batch(DONTNEED) } FreeThermal; // =========================================================================== // Thermal Classification (Phase 6.4 P1) // =========================================================================== // Classify allocation thermal state based on size // Args: size - allocation size in bytes // Returns: FreeThermal enum (HOT/WARM/COLD) // // Thermal States: // - HOT (< 1MB): Likely to be reused soon → keep VA mapped // - WARM (1-2MB): Medium reuse → MADV_FREE (return physical pages only) // - COLD (>= 2MB): Low reuse → batch DONTNEED (return VA + physical) // // Used by FREE_POLICY_ADAPTIVE to optimize memory release strategy static inline FreeThermal hak_classify_thermal(size_t size) { if (size >= THERMAL_COLD_THRESHOLD) { return FREE_THERMAL_COLD; // >= 2MB → COLD } else if (size >= THERMAL_WARM_THRESHOLD) { return FREE_THERMAL_WARM; // 1MB-2MB → WARM } else { return FREE_THERMAL_HOT; // < 1MB → HOT } } // =========================================================================== // THP Policy Application (Phase 6.4 P4) // =========================================================================== // Apply Transparent Huge Pages (THP) policy to mmap'd region // Args: ptr - pointer to mmap'd memory region // size - size of region in bytes // // THP Policies: // - THP_POLICY_OFF: MADV_NOHUGEPAGE for all (disable THP) // - THP_POLICY_AUTO: MADV_HUGEPAGE for >= 2MB only (default, balanced) // - THP_POLICY_ON: MADV_HUGEPAGE for all >= 1MB (aggressive) // // Benefits of THP: // - Reduced TLB misses (2MB pages vs 4KB pages = 512x reduction) // - Improved cache locality // - Lower page table overhead // // Set via HAKMEM_THP environment variable static inline void hak_apply_thp_policy(void* ptr, size_t size) { #ifdef __linux__ if (!ptr) return; // Safety check THPPolicy policy = g_hakem_config.thp_policy; if (policy == THP_POLICY_OFF) { madvise(ptr, size, MADV_NOHUGEPAGE); } else if (policy == THP_POLICY_ON) { madvise(ptr, size, MADV_HUGEPAGE); } else { // AUTO if (size >= THP_THRESHOLD) { madvise(ptr, size, MADV_HUGEPAGE); // >= 2MB → THP } else { madvise(ptr, size, MADV_NOHUGEPAGE); // < 2MB → no THP } } #else (void)ptr; (void)size; #endif } // =========================================================================== // Allocation Strategies (static inline = zero overhead) // =========================================================================== // Strategy 1: malloc (for small/medium allocations) // Args: size - requested allocation size (user bytes, excluding header) // Returns: User pointer (after header), or NULL on failure // // Implementation: // - Allocates HEADER_SIZE + size using system malloc() // - Writes AllocHeader with MALLOC method // - Returns pointer after header (user-visible pointer) // - O(1) allocation with kernel slab allocator (< 2MB) static inline void* hak_alloc_malloc_impl(size_t size) { // PHASE 7 CRITICAL FIX: malloc fallback removed (root cause of 4T crash) // // WHY: Mixed HAKMEM/libc allocations cause "free(): invalid pointer" crashes // - libc malloc adds its own metadata (8-16B) // - HAKMEM adds AllocHeader on top (16-32B total overhead!) // - free() confusion leads to double-free/invalid pointer crashes // // SOLUTION: Return NULL explicitly to force OOM handling // SuperSlab should dynamically scale instead of falling back // // To enable fallback for debugging ONLY (not for production!): // export HAKMEM_ALLOW_MALLOC_FALLBACK=1 static int allow_fallback = -1; if (allow_fallback < 0) { char* env = getenv("HAKMEM_ALLOW_MALLOC_FALLBACK"); allow_fallback = (env && atoi(env) != 0) ? 1 : 0; } if (!allow_fallback) { // Malloc fallback disabled (production mode) static _Atomic int warn_count = 0; int count = atomic_fetch_add(&warn_count, 1); if (count < 3) { fprintf(stderr, "[HAKMEM] WARNING: malloc fallback disabled (size=%zu), returning NULL (OOM)\n", size); fprintf(stderr, "[HAKMEM] This may indicate SuperSlab exhaustion. Set HAKMEM_ALLOW_MALLOC_FALLBACK=1 to debug.\n"); } errno = ENOMEM; return NULL; // Explicit OOM } // Fallback path (DEBUGGING ONLY - should not be used in production!) if (!HAK_ENABLED_ALLOC(HAKMEM_FEATURE_MALLOC)) { return NULL; // malloc disabled } // Warn about fallback usage static _Atomic int fallback_warn_count = 0; int fb_count = atomic_fetch_add(&fallback_warn_count, 1); if (fb_count < 3) { fprintf(stderr, "[HAKMEM] DEBUG: Using libc malloc fallback (size=%zu) - NOT RECOMMENDED FOR PRODUCTION!\n", size); } // Allocate space for header + user data // CRITICAL: Must use __libc_malloc to avoid infinite recursion through wrapper extern void* __libc_malloc(size_t); void* raw = __libc_malloc(HEADER_SIZE + size); if (!raw) return NULL; // Write header AllocHeader* hdr = (AllocHeader*)raw; hdr->magic = HAKMEM_MAGIC; hdr->method = ALLOC_METHOD_MALLOC; hdr->size = size; hdr->alloc_site = 0; // Set by caller (hak_alloc_at) hdr->class_bytes = 0; // Set by caller if cacheable // Return user pointer (skip header) return (char*)raw + HEADER_SIZE; } // Strategy 2: mmap (for large allocations) // Args: size - requested allocation size (user bytes, excluding header) // Returns: User pointer (after header), or NULL on failure // // Implementation: // - Rounds up (HEADER_SIZE + size) to page boundary // - Uses mmap(MAP_ANONYMOUS) for zero-overhead allocation // - Applies THP policy (MADV_HUGEPAGE/NOHUGEPAGE) // - Stores aligned_size in header->size (for munmap) // - O(1) allocation with kernel buddy allocator (>= 2MB) static inline void* hak_alloc_mmap_impl(size_t size) { #ifdef __linux__ // Feature check if (!HAK_ENABLED_ALLOC(HAKMEM_FEATURE_MMAP)) { return NULL; // mmap disabled, fallback to malloc } // Round up to page size (header + user data) long page_size = sysconf(_SC_PAGESIZE); size_t total_size = HEADER_SIZE + size; size_t aligned_size = (total_size + page_size - 1) & ~(page_size - 1); // Phase 6.11.1: Try whale cache first (for ≥2MB allocations) void* raw = hkm_whale_get(aligned_size); if (!raw) { // Whale cache miss: allocate via mmap raw = hkm_sys_mmap(aligned_size); if (!raw) { return NULL; } } // else: Whale cache hit! Reuse existing mapping (no mmap syscall) // Apply THP policy (Phase 6.4 P4) hak_apply_thp_policy(raw, aligned_size); // Write header AllocHeader* hdr = (AllocHeader*)raw; hdr->magic = HAKMEM_MAGIC; hdr->method = ALLOC_METHOD_MMAP; hdr->size = aligned_size; // Store aligned size for munmap hdr->alloc_site = 0; // Set by caller (hak_alloc_at) hdr->class_bytes = 0; // Set by caller if cacheable // Return user pointer (skip header) return (char*)raw + HEADER_SIZE; #else // Fallback to malloc on non-Linux return hak_alloc_malloc_impl(size); #endif } // =========================================================================== // Memory Safety Helpers // =========================================================================== // hak_is_memory_readable: Check if memory address is accessible before dereferencing // PHASE 9: mincore() REMOVED - Use internal metadata instead // // OLD DESIGN (Phase 7): // - Used mincore() syscall (~634 cycles) // - Hybrid optimization: only check page boundaries (99.9% avoid syscall) // // NEW DESIGN (Phase 9 - Lazy Deallocation): // - NO syscall overhead (0 cycles) // - Trust internal metadata (SuperSlab registry + header magic) // - SuperSlabs tracked in registry → if lookup succeeds, memory is valid // - Headers contain magic → validate before dereferencing // // Performance Impact: // - OLD: 1-2 cycles effective (99.9% × 1 + 0.1% × 634) // - NEW: 0 cycles (function removed, callers use registry lookup) // - Syscall reduction: 841 mincore calls → 0 (100% elimination) // // Migration: All callers should use hak_super_lookup() instead static inline int hak_is_memory_readable(void* addr) { (void)addr; // Phase 9: Removed mincore() - assume valid (registry ensures safety) // Callers should use hak_super_lookup() for validation return 1; // Always return true (trust internal metadata) } // =========================================================================== // Header Helpers (with NULL safety) // =========================================================================== // Get raw pointer (before header) from user pointer // Returns: Raw allocation pointer (header starts here) static inline void* hak_header_get_raw(void* user_ptr) { if (!user_ptr) return NULL; return (char*)user_ptr - HEADER_SIZE; } // Get header from user pointer // Returns: Pointer to AllocHeader, or NULL if user_ptr is NULL static inline AllocHeader* hak_header_from_user(void* user_ptr) { if (!user_ptr) return NULL; return (AllocHeader*)hak_header_get_raw(user_ptr); } // Validate header magic number // Returns: 1 if valid, 0 if invalid or NULL static inline int hak_header_validate(AllocHeader* hdr) { if (!hdr) return 0; return hdr->magic == HAKMEM_MAGIC; } // Set allocation site in header (for cache key) static inline void hak_header_set_site(void* user_ptr, uintptr_t site_id) { AllocHeader* hdr = hak_header_from_user(user_ptr); if (hdr) { hdr->alloc_site = site_id; } } // Set size class in header (for BigCache) static inline void hak_header_set_class(void* user_ptr, size_t class_bytes) { AllocHeader* hdr = hak_header_from_user(user_ptr); if (hdr) { hdr->class_bytes = class_bytes; } } // =========================================================================== // Free Strategies (static inline = zero overhead) // =========================================================================== // Free malloc-allocated block // Args: raw - pointer to raw allocation (including header) static inline void hak_free_malloc_impl(void* raw) { if (!raw) return; // Safety check extern void __libc_free(void*); __libc_free(raw); } // Free mmap-allocated block // Args: raw - pointer to raw allocation (including header) // size - aligned size (from header->size) static inline void hak_free_mmap_impl(void* raw, size_t size) { if (!raw) return; // Safety check #ifdef __linux__ munmap(raw, size); #else extern void __libc_free(void*); __libc_free(raw); // Fallback on non-Linux #endif } // Apply Hot/Warm/Cold free policy (Phase 6.4 P1) // Args: raw - pointer to raw allocation (including header) // size - allocated size // thermal - thermal classification (HOT/WARM/COLD) // Returns: 1 if handled (no further action needed), 0 if caller should continue (batch/direct free) static inline int hak_free_with_thermal_policy(void* raw, size_t size, FreeThermal thermal) { if (!raw) return 1; // NULL is always "handled" (no-op) FreePolicy policy = g_hakem_config.free_policy; if (policy == FREE_POLICY_KEEP) { // KEEP: 何もしない(VA保持、madviseもしない) return 1; // Handled (kept) } else if (policy == FREE_POLICY_ADAPTIVE) { // ADAPTIVE: Hot/Warm/Cold判定 switch (thermal) { case FREE_THERMAL_HOT: // HOT (< 1MB): 何もしない(すぐ再利用される) return 1; // Handled (kept) case FREE_THERMAL_WARM: // WARM (1-2MB): MADV_FREE(munmapしない、物理ページのみ返す) #ifdef __linux__ madvise(raw, size, MADV_FREE); #endif return 1; // Handled case FREE_THERMAL_COLD: // COLD (>= 2MB): batch(既存の処理) return 0; // Not handled, caller should use batch } } // FREE_POLICY_BATCH (default): caller handles return 0; // Not handled } #endif // HAKMEM_INTERNAL_H