hakmem/core/hakmem_internal.h

// hakmem_internal.h - Internal Implementation Helpers (static inline)
// Purpose: Separate implementation details from public API using zero-cost abstraction
//
// Design Philosophy:
// - All functions are `static inline` → Zero overhead (100% inlined with -O2)
// - Type-safe (unlike macros)
// - Debuggable (unlike macros)
// - Readable (unlike macros)
//
// This file should be #include'd by hakmem.c ONLY (not a public header)

#ifndef HAKMEM_INTERNAL_H
#define HAKMEM_INTERNAL_H

#include "hakmem.h"
#include "hakmem_config.h"
#include "hakmem_sys.h"        // Phase 6.11.1: Syscall wrappers with timing
#include "hakmem_whale.h"      // Phase 6.11.1: Whale fast-path cache
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>             // Phase 7: errno for OOM handling
#include <sys/mman.h>          // For mincore, madvise
#include <unistd.h>            // For sysconf

// Exposed runtime mode: set to 1 when loaded via LD_PRELOAD (libhakmem.so)
extern int g_ldpreload_mode;

// ============================================================================
// Phase 6.15 P0.1: Debug Logging Control
// ============================================================================

// Compile-time control: HAKMEM_DEBUG_VERBOSE (default OFF for performance)
// Runtime control: HAKMEM_QUIET environment variable (only for debug builds)
//
// Build modes:
//   Release (default): make shared          → No logs (HAKMEM_LOG compiled out)
//   Debug:             make debug           → Logs enabled (unless HAKMEM_QUIET=1)
//   Debug quiet:       HAKMEM_QUIET=1 ...   → Logs suppressed at runtime

#ifdef HAKMEM_DEBUG_VERBOSE
  // Debug build: Check HAKMEM_QUIET at runtime
  #define HAKMEM_LOG(fmt, ...) do { \
      static int quiet_checked = 0; \
      static int quiet_mode = 0; \
      if (!quiet_checked) { \
          char* env = getenv("HAKMEM_QUIET"); \
          quiet_mode = (env && strcmp(env, "1") == 0); \
          quiet_checked = 1; \
      } \
      if (!quiet_mode) { \
          fprintf(stderr, "[hakmem] " fmt, ##__VA_ARGS__); \
      } \
  } while(0)
#else
  // Release build: Compile out all logs (zero overhead)
  #define HAKMEM_LOG(fmt, ...) ((void)0)
#endif

#ifdef __linux__
#include <sys/mman.h>
#include <unistd.h>

// MADV_FREE support (Linux kernel 4.5+)
#ifndef MADV_FREE
  #define MADV_FREE 8
#endif
// Fallback for MADV_DONTNEED if not defined (Linux usually defines 4)
#ifndef MADV_DONTNEED
  #define MADV_DONTNEED 4
#endif

// THP support
#ifndef MADV_HUGEPAGE
  #define MADV_HUGEPAGE 14
#endif
#ifndef MADV_NOHUGEPAGE
  #define MADV_NOHUGEPAGE 15
#endif
#endif

// ===========================================================================
// Internal Constants
// ===========================================================================

#define HAKMEM_MAGIC 0x48414B4D  // "HAKM" in ASCII (uint32_t)
#define HEADER_SIZE sizeof(AllocHeader)

// THP thresholds (from config)
#define THP_THRESHOLD (2 * 1024 * 1024)  // 2MB

// Thermal thresholds (from Phase 6.4 P1)
#define THERMAL_COLD_THRESHOLD  (2 * 1024 * 1024)   // 2MB
#define THERMAL_WARM_THRESHOLD  (1 * 1024 * 1024)   // 1MB

// ===========================================================================
// Internal Types
// ===========================================================================

typedef enum {
    ALLOC_METHOD_MALLOC = 0,
    ALLOC_METHOD_MMAP = 1,
    ALLOC_METHOD_POOL = 2,     // Phase 6.9.1: L2 Pool allocations (2-32KB)
    ALLOC_METHOD_L25_POOL = 3, // Phase 6.13: L2.5 Pool allocations (64KB-1MB)
} AllocMethod;

typedef struct {
    uint32_t    magic;        // Magic number for validation
    AllocMethod method;       // Allocation method (malloc/mmap)
    size_t      size;         // Original size (for munmap)
    uintptr_t   alloc_site;   // Call-site address
    size_t      class_bytes;  // Size class for caching (0=no cache)
    uintptr_t   owner_tid;    // Owning thread (for Mid/Tiny per-thread fast path). 0 if unknown
} AllocHeader;

typedef enum {
    FREE_THERMAL_HOT,       // すぐ再利用 → 何もしない（KEEP）
    FREE_THERMAL_WARM,      // 中間 → MADV_FREE（munmapしない）
    FREE_THERMAL_COLD       // 長期未使用 → batch（DONTNEED）
} FreeThermal;

// ===========================================================================
// Thermal Classification (Phase 6.4 P1)
// ===========================================================================

// Classify allocation thermal state based on size
// Args: size - allocation size in bytes
// Returns: FreeThermal enum (HOT/WARM/COLD)
//
// Thermal States:
// - HOT  (< 1MB):   Likely to be reused soon → keep VA mapped
// - WARM (1-2MB):   Medium reuse → MADV_FREE (return physical pages only)
// - COLD (>= 2MB):  Low reuse → batch DONTNEED (return VA + physical)
//
// Used by FREE_POLICY_ADAPTIVE to optimize memory release strategy
static inline FreeThermal hak_classify_thermal(size_t size) {
    if (size >= THERMAL_COLD_THRESHOLD) {
        return FREE_THERMAL_COLD;   // >= 2MB → COLD
    } else if (size >= THERMAL_WARM_THRESHOLD) {
        return FREE_THERMAL_WARM;   // 1MB-2MB → WARM
    } else {
        return FREE_THERMAL_HOT;    // < 1MB → HOT
    }
}

// ===========================================================================
// THP Policy Application (Phase 6.4 P4)
// ===========================================================================

// Apply Transparent Huge Pages (THP) policy to mmap'd region
// Args: ptr - pointer to mmap'd memory region
//       size - size of region in bytes
//
// THP Policies:
// - THP_POLICY_OFF:  MADV_NOHUGEPAGE for all (disable THP)
// - THP_POLICY_AUTO: MADV_HUGEPAGE for >= 2MB only (default, balanced)
// - THP_POLICY_ON:   MADV_HUGEPAGE for all >= 1MB (aggressive)
//
// Benefits of THP:
// - Reduced TLB misses (2MB pages vs 4KB pages = 512x reduction)
// - Improved cache locality
// - Lower page table overhead
//
// Set via HAKMEM_THP environment variable
static inline void hak_apply_thp_policy(void* ptr, size_t size) {
#ifdef __linux__
    if (!ptr) return;  // Safety check

    THPPolicy policy = g_hakem_config.thp_policy;

    if (policy == THP_POLICY_OFF) {
        madvise(ptr, size, MADV_NOHUGEPAGE);
    } else if (policy == THP_POLICY_ON) {
        madvise(ptr, size, MADV_HUGEPAGE);
    } else {  // AUTO
        if (size >= THP_THRESHOLD) {
            madvise(ptr, size, MADV_HUGEPAGE);  // >= 2MB → THP
        } else {
            madvise(ptr, size, MADV_NOHUGEPAGE);  // < 2MB → no THP
        }
    }
#else
    (void)ptr;
    (void)size;
#endif
}

// ===========================================================================
// Allocation Strategies (static inline = zero overhead)
// ===========================================================================

// Strategy 1: malloc (for small/medium allocations)
// Args: size - requested allocation size (user bytes, excluding header)
// Returns: User pointer (after header), or NULL on failure
//
// Implementation:
// - Allocates HEADER_SIZE + size using system malloc()
// - Writes AllocHeader with MALLOC method
// - Returns pointer after header (user-visible pointer)
// - O(1) allocation with kernel slab allocator (< 2MB)
static inline void* hak_alloc_malloc_impl(size_t size) {
    // PHASE 7 CRITICAL FIX: malloc fallback removed (root cause of 4T crash)
    //
    // WHY: Mixed HAKMEM/libc allocations cause "free(): invalid pointer" crashes
    //      - libc malloc adds its own metadata (8-16B)
    //      - HAKMEM adds AllocHeader on top (16-32B total overhead!)
    //      - free() confusion leads to double-free/invalid pointer crashes
    //
    // SOLUTION: Return NULL explicitly to force OOM handling
    //           SuperSlab should dynamically scale instead of falling back
    //
    // To enable fallback for debugging ONLY (not for production!):
    //   export HAKMEM_ALLOW_MALLOC_FALLBACK=1

    static int allow_fallback = -1;
    if (allow_fallback < 0) {
        char* env = getenv("HAKMEM_ALLOW_MALLOC_FALLBACK");
        allow_fallback = (env && atoi(env) != 0) ? 1 : 0;
    }

    if (!allow_fallback) {
        // Malloc fallback disabled (production mode)
        static _Atomic int warn_count = 0;
        int count = atomic_fetch_add(&warn_count, 1);
        if (count < 3) {
            fprintf(stderr, "[HAKMEM] WARNING: malloc fallback disabled (size=%zu), returning NULL (OOM)\n", size);
            fprintf(stderr, "[HAKMEM]          This may indicate SuperSlab exhaustion. Set HAKMEM_ALLOW_MALLOC_FALLBACK=1 to debug.\n");
        }
        errno = ENOMEM;
        return NULL;  // Explicit OOM
    }

    // Fallback path (DEBUGGING ONLY - should not be used in production!)
    if (!HAK_ENABLED_ALLOC(HAKMEM_FEATURE_MALLOC)) {
        return NULL;  // malloc disabled
    }

    // Warn about fallback usage
    static _Atomic int fallback_warn_count = 0;
    int fb_count = atomic_fetch_add(&fallback_warn_count, 1);
    if (fb_count < 3) {
        fprintf(stderr, "[HAKMEM] DEBUG: Using libc malloc fallback (size=%zu) - NOT RECOMMENDED FOR PRODUCTION!\n", size);
    }

    // Allocate space for header + user data
    // CRITICAL: Must use __libc_malloc to avoid infinite recursion through wrapper
    extern void* __libc_malloc(size_t);
    void* raw = __libc_malloc(HEADER_SIZE + size);
    if (!raw) return NULL;

    // Write header
    AllocHeader* hdr = (AllocHeader*)raw;
    hdr->magic = HAKMEM_MAGIC;
    hdr->method = ALLOC_METHOD_MALLOC;
    hdr->size = size;
    hdr->alloc_site = 0;      // Set by caller (hak_alloc_at)
    hdr->class_bytes = 0;     // Set by caller if cacheable

    // Return user pointer (skip header)
    return (char*)raw + HEADER_SIZE;
}

// Strategy 2: mmap (for large allocations)
// Args: size - requested allocation size (user bytes, excluding header)
// Returns: User pointer (after header), or NULL on failure
//
// Implementation:
// - Rounds up (HEADER_SIZE + size) to page boundary
// - Uses mmap(MAP_ANONYMOUS) for zero-overhead allocation
// - Applies THP policy (MADV_HUGEPAGE/NOHUGEPAGE)
// - Stores aligned_size in header->size (for munmap)
// - O(1) allocation with kernel buddy allocator (>= 2MB)
static inline void* hak_alloc_mmap_impl(size_t size) {
#ifdef __linux__
    // Feature check
    if (!HAK_ENABLED_ALLOC(HAKMEM_FEATURE_MMAP)) {
        return NULL;  // mmap disabled, fallback to malloc
    }

    // Round up to page size (header + user data)
    long page_size = sysconf(_SC_PAGESIZE);
    size_t total_size = HEADER_SIZE + size;
    size_t aligned_size = (total_size + page_size - 1) & ~(page_size - 1);

    // Phase 6.11.1: Try whale cache first (for ≥2MB allocations)
    void* raw = hkm_whale_get(aligned_size);

    if (!raw) {
        // Whale cache miss: allocate via mmap
        raw = hkm_sys_mmap(aligned_size);
        if (!raw) {
            return NULL;
        }
    }
    // else: Whale cache hit! Reuse existing mapping (no mmap syscall)

    // Apply THP policy (Phase 6.4 P4)
    hak_apply_thp_policy(raw, aligned_size);

    // Write header
    AllocHeader* hdr = (AllocHeader*)raw;
    hdr->magic = HAKMEM_MAGIC;
    hdr->method = ALLOC_METHOD_MMAP;
    hdr->size = aligned_size;  // Store aligned size for munmap
    hdr->alloc_site = 0;       // Set by caller (hak_alloc_at)
    hdr->class_bytes = 0;      // Set by caller if cacheable

    // Return user pointer (skip header)
    return (char*)raw + HEADER_SIZE;
#else
    // Fallback to malloc on non-Linux
    return hak_alloc_malloc_impl(size);
#endif
}

// ===========================================================================
// Memory Safety Helpers
// ===========================================================================

// hak_is_memory_readable: Check if memory address is accessible before dereferencing
// CRITICAL FIX (2025-11-07): Prevents SEGV when checking header magic on unmapped memory
//
// PERFORMANCE WARNING (Phase 7-1.3, 2025-11-08):
// This function is EXPENSIVE (~634 cycles via mincore syscall on Linux).
// DO NOT call this on every free() - use alignment check first to avoid overhead!
//
// Recommended Pattern (Hybrid Approach):
//   if (((uintptr_t)ptr & 0xFFF) == 0) {
//       // Page boundary (0.1% case) - do safety check
//       if (!hak_is_memory_readable(ptr)) { /* handle page boundary */ }
//   }
//   // Normal case (99.9%): ptr is safe to read (no mincore call!)
//
// Performance Impact:
//   - Without hybrid: 634 cycles on EVERY free
//   - With hybrid: 1-2 cycles effective (99.9% × 1 + 0.1% × 634)
//   - Improvement: 317-634x faster!
//
// See: PHASE7_DESIGN_REVIEW.md, Section 1.1 for full analysis
static inline int hak_is_memory_readable(void* addr) {
#ifdef __linux__
    unsigned char vec;
    // mincore returns 0 if page is mapped, -1 (ENOMEM) if not
    // MEASURED COST: ~634 cycles (Phase 7-1.2 micro-benchmark)
    return mincore(addr, 1, &vec) == 0;
#else
    // Non-Linux: assume accessible (conservative fallback)
    // TODO: Add platform-specific checks for BSD, macOS, Windows
    return 1;
#endif
}

// ===========================================================================
// Header Helpers (with NULL safety)
// ===========================================================================

// Get raw pointer (before header) from user pointer
// Returns: Raw allocation pointer (header starts here)
static inline void* hak_header_get_raw(void* user_ptr) {
    if (!user_ptr) return NULL;
    return (char*)user_ptr - HEADER_SIZE;
}

// Get header from user pointer
// Returns: Pointer to AllocHeader, or NULL if user_ptr is NULL
static inline AllocHeader* hak_header_from_user(void* user_ptr) {
    if (!user_ptr) return NULL;
    return (AllocHeader*)hak_header_get_raw(user_ptr);
}

// Validate header magic number
// Returns: 1 if valid, 0 if invalid or NULL
static inline int hak_header_validate(AllocHeader* hdr) {
    if (!hdr) return 0;
    return hdr->magic == HAKMEM_MAGIC;
}

// Set allocation site in header (for cache key)
static inline void hak_header_set_site(void* user_ptr, uintptr_t site_id) {
    AllocHeader* hdr = hak_header_from_user(user_ptr);
    if (hdr) {
        hdr->alloc_site = site_id;
    }
}

// Set size class in header (for BigCache)
static inline void hak_header_set_class(void* user_ptr, size_t class_bytes) {
    AllocHeader* hdr = hak_header_from_user(user_ptr);
    if (hdr) {
        hdr->class_bytes = class_bytes;
    }
}

// ===========================================================================
// Free Strategies (static inline = zero overhead)
// ===========================================================================

// Free malloc-allocated block
// Args: raw - pointer to raw allocation (including header)
static inline void hak_free_malloc_impl(void* raw) {
    if (!raw) return;  // Safety check
    free(raw);
}

// Free mmap-allocated block
// Args: raw - pointer to raw allocation (including header)
//       size - aligned size (from header->size)
static inline void hak_free_mmap_impl(void* raw, size_t size) {
    if (!raw) return;  // Safety check
#ifdef __linux__
    munmap(raw, size);
#else
    free(raw);  // Fallback on non-Linux
#endif
}

// Apply Hot/Warm/Cold free policy (Phase 6.4 P1)
// Args: raw - pointer to raw allocation (including header)
//       size - allocated size
//       thermal - thermal classification (HOT/WARM/COLD)
// Returns: 1 if handled (no further action needed), 0 if caller should continue (batch/direct free)
static inline int hak_free_with_thermal_policy(void* raw, size_t size, FreeThermal thermal) {
    if (!raw) return 1;  // NULL is always "handled" (no-op)

    FreePolicy policy = g_hakem_config.free_policy;

    if (policy == FREE_POLICY_KEEP) {
        // KEEP: 何もしない（VA保持、madviseもしない）
        return 1;  // Handled (kept)
    } else if (policy == FREE_POLICY_ADAPTIVE) {
        // ADAPTIVE: Hot/Warm/Cold判定
        switch (thermal) {
            case FREE_THERMAL_HOT:
                // HOT (< 1MB): 何もしない（すぐ再利用される）
                return 1;  // Handled (kept)

            case FREE_THERMAL_WARM:
                // WARM (1-2MB): MADV_FREE（munmapしない、物理ページのみ返す）
#ifdef __linux__
                madvise(raw, size, MADV_FREE);
#endif
                return 1;  // Handled

            case FREE_THERMAL_COLD:
                // COLD (>= 2MB): batch（既存の処理）
                return 0;  // Not handled, caller should use batch
        }
    }

    // FREE_POLICY_BATCH (default): caller handles
    return 0;  // Not handled
}

#endif // HAKMEM_INTERNAL_H