2025-11-17 05:29:08 +09:00
|
|
|
|
// malloc_tiny_fast.h - Phase 26: Front Gate Unification (Tiny Fast Path)
|
|
|
|
|
|
//
|
|
|
|
|
|
// Goal: Eliminate 3-layer overhead (malloc → hak_alloc_at → wrapper → tiny_alloc_fast)
|
|
|
|
|
|
// Target: +10-15% performance (11.35M → 12.5-13.5M ops/s)
|
|
|
|
|
|
//
|
|
|
|
|
|
// Design (ChatGPT analysis):
|
|
|
|
|
|
// - Replace: malloc → hak_alloc_at (236 lines) → wrapper (diagnostics) → tiny_alloc_fast
|
|
|
|
|
|
// - With: malloc → malloc_tiny_fast (single-layer, direct to Unified Cache)
|
|
|
|
|
|
// - Preserves: Safety checks (lock depth, initializing, LD_SAFE, jemalloc block)
|
|
|
|
|
|
// - Leverages: Phase 23 Unified Cache (tcache-style, 2-3 cache misses)
|
|
|
|
|
|
//
|
|
|
|
|
|
// Performance:
|
|
|
|
|
|
// - Current overhead: malloc(8.97%) + routing + wrapper(3.63%) + tiny(5.37%) = 17.97%
|
|
|
|
|
|
// - BenchFast ceiling: 8-10 instructions (~1-2% overhead)
|
|
|
|
|
|
// - Gap: ~16%
|
|
|
|
|
|
// - Target: Close half the gap (+10-15% improvement)
|
|
|
|
|
|
//
|
|
|
|
|
|
// ENV Variables:
|
|
|
|
|
|
// HAKMEM_FRONT_GATE_UNIFIED=1 # Enable Front Gate Unification (default: 0, OFF)
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef HAK_FRONT_MALLOC_TINY_FAST_H
|
|
|
|
|
|
#define HAK_FRONT_MALLOC_TINY_FAST_H
|
|
|
|
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
#include "../hakmem_build_flags.h"
|
|
|
|
|
|
#include "../hakmem_tiny_config.h" // For TINY_NUM_CLASSES
|
|
|
|
|
|
#include "tiny_unified_cache.h" // For unified_cache_pop_or_refill
|
|
|
|
|
|
#include "../tiny_region_id.h" // For tiny_region_id_write_header
|
|
|
|
|
|
#include "../hakmem_tiny.h" // For hak_tiny_size_to_class
|
|
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// ENV Control (cached, lazy init)
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
// Enable flag (default: 0, OFF)
|
|
|
|
|
|
static inline int front_gate_unified_enabled(void) {
|
|
|
|
|
|
static int g_enable = -1;
|
|
|
|
|
|
if (__builtin_expect(g_enable == -1, 0)) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_FRONT_GATE_UNIFIED");
|
2025-11-22 01:29:05 +09:00
|
|
|
|
g_enable = (e && *e && *e == '0') ? 0 : 1; // default ON
|
2025-11-17 05:29:08 +09:00
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
|
if (g_enable) {
|
|
|
|
|
|
fprintf(stderr, "[FrontGate-INIT] front_gate_unified_enabled() = %d\n", g_enable);
|
|
|
|
|
|
fflush(stderr);
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
}
|
|
|
|
|
|
return g_enable;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// Phase 26-A: malloc_tiny_fast() - Ultra-thin Tiny allocation
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
// Single-layer Tiny allocation (bypasses hak_alloc_at + wrapper + diagnostics)
|
|
|
|
|
|
// Preconditions:
|
|
|
|
|
|
// - Called AFTER malloc() safety checks (lock depth, initializing, LD_SAFE)
|
|
|
|
|
|
// - size <= tiny_get_max_size() (caller verified)
|
|
|
|
|
|
// Returns:
|
|
|
|
|
|
// - USER pointer on success
|
|
|
|
|
|
// - NULL on Unified Cache miss (caller falls back to normal path)
|
|
|
|
|
|
__attribute__((always_inline))
|
|
|
|
|
|
static inline void* malloc_tiny_fast(size_t size) {
|
|
|
|
|
|
// 1. size → class_idx (inline table lookup, 1-2 instructions)
|
|
|
|
|
|
int class_idx = hak_tiny_size_to_class(size);
|
|
|
|
|
|
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
|
|
|
|
|
return NULL; // Out of range (should not happen if caller checked tiny_get_max_size())
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 2. Phase 23: Unified Cache pop-or-refill (tcache-style, 2-3 cache misses)
|
|
|
|
|
|
// This internally handles:
|
|
|
|
|
|
// - Cache hit: direct pop (fast path)
|
|
|
|
|
|
// - Cache miss: batch refill from SuperSlab (slow path)
|
|
|
|
|
|
void* base = unified_cache_pop_or_refill(class_idx);
|
|
|
|
|
|
if (__builtin_expect(base == NULL, 0)) {
|
|
|
|
|
|
// Unified Cache disabled OR refill failed
|
|
|
|
|
|
// Fall back to normal path (caller handles via hak_alloc_at)
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 3. Write header + return USER pointer (2-3 instructions)
|
|
|
|
|
|
#ifdef HAKMEM_TINY_HEADER_CLASSIDX
|
|
|
|
|
|
tiny_region_id_write_header(base, class_idx); // Write 1-byte header (BASE first!)
|
|
|
|
|
|
return (void*)((char*)base + 1); // Return USER pointer
|
|
|
|
|
|
#else
|
|
|
|
|
|
return base; // No header mode - return BASE directly
|
|
|
|
|
|
#endif
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// Phase 26-B: free_tiny_fast() - Ultra-thin Tiny deallocation
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
// Single-layer Tiny deallocation (bypasses hak_free_at + wrapper + diagnostics)
|
|
|
|
|
|
// Preconditions:
|
|
|
|
|
|
// - ptr is from malloc_tiny_fast() (has valid header)
|
|
|
|
|
|
// - Front Gate Unified is enabled
|
|
|
|
|
|
// Returns:
|
|
|
|
|
|
// - 1 on success (pushed to Unified Cache)
|
|
|
|
|
|
// - 0 on failure (caller falls back to normal free path)
|
|
|
|
|
|
__attribute__((always_inline))
|
|
|
|
|
|
static inline int free_tiny_fast(void* ptr) {
|
|
|
|
|
|
if (__builtin_expect(!ptr, 0)) return 0;
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef HAKMEM_TINY_HEADER_CLASSIDX
|
|
|
|
|
|
// 1. ページ境界ガード:
|
|
|
|
|
|
// ptr がページ先頭 (offset==0) の場合、ptr-1 は別ページか未マップ領域になる可能性がある。
|
|
|
|
|
|
// その場合はヘッダ読みを行わず、通常 free 経路にフォールバックする。
|
|
|
|
|
|
uintptr_t off = (uintptr_t)ptr & 0xFFFu;
|
|
|
|
|
|
if (__builtin_expect(off == 0, 0)) {
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 2. Fast header magic validation (必須)
|
|
|
|
|
|
// Release ビルドでは tiny_region_id_read_header() が magic を省略するため、
|
|
|
|
|
|
// ここで自前に Tiny 専用ヘッダ (0xA0) を検証しておく。
|
|
|
|
|
|
uint8_t* header_ptr = (uint8_t*)ptr - 1;
|
|
|
|
|
|
uint8_t header = *header_ptr;
|
|
|
|
|
|
uint8_t magic = header & 0xF0u;
|
|
|
|
|
|
if (__builtin_expect(magic != HEADER_MAGIC, 0)) {
|
|
|
|
|
|
// Tiny ヘッダではない → Mid/Large/外部ポインタなので通常 free 経路へ
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 3. class_idx 抽出(下位4bit)
|
|
|
|
|
|
int class_idx = (int)(header & HEADER_CLASS_MASK);
|
|
|
|
|
|
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 4. BASE を計算して Unified Cache に push
|
|
|
|
|
|
void* base = (void*)((char*)ptr - 1);
|
|
|
|
|
|
int pushed = unified_cache_push(class_idx, base);
|
|
|
|
|
|
if (__builtin_expect(pushed, 1)) {
|
|
|
|
|
|
return 1; // Success
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Unified Cache full → 通常 free 経路へ
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
#else
|
|
|
|
|
|
// No header mode - fall back to normal free
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#endif // HAK_FRONT_MALLOC_TINY_FAST_H
|