Box TLS-SLL + free boundary hardening: normalize C0–C6 to base (ptr-1) at free boundary; route all caches/freelists via base; replace remaining g_tls_sll_head direct writes with Box API (tls_sll_push/splice) in refill/magazine/ultra; keep C7 excluded. Fixes rbp=0xa0 free crash by preventing header overwrite and centralizing TLS-SLL invariants.
This commit is contained in:
@ -1,6 +1,7 @@
|
||||
// front_gate_box.c - Front Gate Box (SFC/SLL priority and helpers)
|
||||
#include "front_gate_box.h"
|
||||
#include "tiny_alloc_fast_sfc.inc.h"
|
||||
#include "tls_sll_box.h" // Box TLS-SLL API
|
||||
|
||||
// TLS SLL state (extern from hakmem_tiny.c)
|
||||
extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES];
|
||||
@ -29,11 +30,9 @@ int front_gate_try_pop(int class_idx, void** out_ptr) {
|
||||
|
||||
// Layer 1: TLS SLL
|
||||
if (__builtin_expect(g_tls_sll_enable, 1)) {
|
||||
void* head = g_tls_sll_head[class_idx];
|
||||
if (__builtin_expect(head != NULL, 1)) {
|
||||
void* head = NULL;
|
||||
if (tls_sll_pop(class_idx, &head)) {
|
||||
g_front_sll_hit[class_idx]++;
|
||||
g_tls_sll_head[class_idx] = *(void**)head; // pop
|
||||
if (g_tls_sll_count[class_idx] > 0) g_tls_sll_count[class_idx]--;
|
||||
*out_ptr = head;
|
||||
return 1;
|
||||
}
|
||||
@ -51,10 +50,8 @@ void front_gate_after_refill(int class_idx, int refilled_count) {
|
||||
|
||||
while (to_move-- > 0 && g_tls_sll_count[class_idx] > 0) {
|
||||
// SLL pop
|
||||
void* ptr = g_tls_sll_head[class_idx];
|
||||
if (!ptr) break;
|
||||
g_tls_sll_head[class_idx] = *(void**)ptr;
|
||||
g_tls_sll_count[class_idx]--;
|
||||
void* ptr = NULL;
|
||||
if (!tls_sll_pop(class_idx, &ptr)) break;
|
||||
|
||||
// SFC push (capacity-guarded inside sfc_free_push)
|
||||
if (!sfc_free_push(class_idx, ptr)) {
|
||||
@ -65,8 +62,11 @@ void front_gate_after_refill(int class_idx, int refilled_count) {
|
||||
}
|
||||
|
||||
void front_gate_push_tls(int class_idx, void* ptr) {
|
||||
*(void**)ptr = g_tls_sll_head[class_idx];
|
||||
g_tls_sll_head[class_idx] = ptr;
|
||||
g_tls_sll_count[class_idx]++;
|
||||
// Use Box TLS-SLL API (C7-safe)
|
||||
if (!tls_sll_push(class_idx, ptr, UINT32_MAX)) {
|
||||
// C7 rejected or capacity exceeded - should not happen in front gate
|
||||
// but handle gracefully (silent discard)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
core/box/front_gate_box.o: core/box/front_gate_box.c \
|
||||
core/box/front_gate_box.h core/hakmem_tiny.h core/hakmem_build_flags.h \
|
||||
core/hakmem_trace.h core/hakmem_tiny_mini_mag.h \
|
||||
core/tiny_alloc_fast_sfc.inc.h core/hakmem_tiny.h
|
||||
core/tiny_alloc_fast_sfc.inc.h core/hakmem_tiny.h core/box/tls_sll_box.h \
|
||||
core/box/../hakmem_tiny_config.h
|
||||
core/box/front_gate_box.h:
|
||||
core/hakmem_tiny.h:
|
||||
core/hakmem_build_flags.h:
|
||||
@ -9,3 +10,5 @@ core/hakmem_trace.h:
|
||||
core/hakmem_tiny_mini_mag.h:
|
||||
core/tiny_alloc_fast_sfc.inc.h:
|
||||
core/hakmem_tiny.h:
|
||||
core/box/tls_sll_box.h:
|
||||
core/box/../hakmem_tiny_config.h:
|
||||
|
||||
228
core/box/front_gate_classifier.c
Normal file
228
core/box/front_gate_classifier.c
Normal file
@ -0,0 +1,228 @@
|
||||
// front_gate_classifier.c - Box FG: Pointer Classification Implementation
|
||||
|
||||
// CRITICAL: Box FG requires header-based classification
|
||||
// Ensure HEADER_MAGIC and HEADER_CLASS_MASK are available
|
||||
#ifndef HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#define HAKMEM_TINY_HEADER_CLASSIDX 1
|
||||
#endif
|
||||
|
||||
#include <stdio.h> // For fprintf in debug
|
||||
#include <stdlib.h> // For abort in debug
|
||||
#include "front_gate_classifier.h"
|
||||
#include "../tiny_region_id.h" // Must come before hakmem_tiny_superslab.h for HEADER_MAGIC
|
||||
#include "../hakmem_tiny_superslab.h"
|
||||
#include "../superslab/superslab_inline.h" // For ss_slabs_capacity
|
||||
#include "../hakmem_build_flags.h"
|
||||
#include "../hakmem_tiny_config.h" // For TINY_NUM_CLASSES, SLAB_SIZE
|
||||
#include "../hakmem_super_registry.h" // For hak_super_lookup (Box REG)
|
||||
|
||||
#ifdef HAKMEM_POOL_TLS_PHASE1
|
||||
#include "../pool_tls.h" // For POOL_MAGIC
|
||||
#endif
|
||||
|
||||
// ========== Debug Stats ==========
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
__thread uint64_t g_classify_header_hit = 0;
|
||||
__thread uint64_t g_classify_headerless_hit = 0;
|
||||
__thread uint64_t g_classify_pool_hit = 0;
|
||||
__thread uint64_t g_classify_unknown_hit = 0;
|
||||
|
||||
void front_gate_print_stats(void) {
|
||||
uint64_t total = g_classify_header_hit + g_classify_headerless_hit +
|
||||
g_classify_pool_hit + g_classify_unknown_hit;
|
||||
if (total == 0) return;
|
||||
|
||||
fprintf(stderr, "\n========== Front Gate Classification Stats ==========\n");
|
||||
fprintf(stderr, "Header (C0-C6): %lu (%.2f%%)\n",
|
||||
g_classify_header_hit, 100.0 * g_classify_header_hit / total);
|
||||
fprintf(stderr, "Headerless (C7): %lu (%.2f%%)\n",
|
||||
g_classify_headerless_hit, 100.0 * g_classify_headerless_hit / total);
|
||||
fprintf(stderr, "Pool TLS: %lu (%.2f%%)\n",
|
||||
g_classify_pool_hit, 100.0 * g_classify_pool_hit / total);
|
||||
fprintf(stderr, "Unknown: %lu (%.2f%%)\n",
|
||||
g_classify_unknown_hit, 100.0 * g_classify_unknown_hit / total);
|
||||
fprintf(stderr, "Total: %lu\n", total);
|
||||
fprintf(stderr, "======================================================\n");
|
||||
}
|
||||
|
||||
static void __attribute__((destructor)) front_gate_stats_destructor(void) {
|
||||
front_gate_print_stats();
|
||||
}
|
||||
#endif
|
||||
|
||||
// ========== Safe Header Probe ==========
|
||||
|
||||
// Try to read 1-byte header at ptr-1 (safe conditions only)
|
||||
// Returns: class_idx (0-7) on success, -1 on failure
|
||||
//
|
||||
// Safety conditions:
|
||||
// 1. Same page: (ptr & 0xFFF) >= 1 → header won't cross page boundary
|
||||
// 2. Valid magic: (header & 0xF0) == HEADER_MAGIC (0xa0)
|
||||
// 3. Valid class: class_idx in range [0, 7]
|
||||
//
|
||||
// Performance: 2-3 cycles (L1 cache hit)
|
||||
static inline int safe_header_probe(void* ptr) {
|
||||
// Safety check: header must be in same page as ptr
|
||||
uintptr_t offset_in_page = (uintptr_t)ptr & 0xFFF;
|
||||
if (offset_in_page == 0) {
|
||||
// ptr is page-aligned → header would be on previous page (unsafe)
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Safe to read header (same page guaranteed)
|
||||
uint8_t* header_ptr = (uint8_t*)ptr - 1;
|
||||
uint8_t header = *header_ptr;
|
||||
|
||||
// Validate magic
|
||||
if ((header & 0xF0) != HEADER_MAGIC) {
|
||||
return -1; // Not a Tiny header
|
||||
}
|
||||
|
||||
// Extract class index
|
||||
int class_idx = header & HEADER_CLASS_MASK;
|
||||
|
||||
// Header-based Tiny never encodes class 7 (C7 is headerless)
|
||||
if (class_idx == 7) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Validate class range
|
||||
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
|
||||
return -1; // Invalid class
|
||||
}
|
||||
|
||||
return class_idx;
|
||||
}
|
||||
|
||||
// ========== Registry Lookup ==========
|
||||
|
||||
// Lookup pointer in SuperSlab registry (fallback when header probe fails)
|
||||
// Returns: classification result with SuperSlab + class_idx + slab_idx
|
||||
//
|
||||
// Performance: 50-100 cycles (hash lookup + validation)
|
||||
static inline ptr_classification_t registry_lookup(void* ptr) {
|
||||
ptr_classification_t result = {
|
||||
.kind = PTR_KIND_UNKNOWN,
|
||||
.class_idx = -1,
|
||||
.ss = NULL,
|
||||
.slab_idx = -1
|
||||
};
|
||||
|
||||
// Query SuperSlab registry
|
||||
struct SuperSlab* ss = hak_super_lookup(ptr);
|
||||
if (!ss) {
|
||||
// Not in Tiny registry
|
||||
return result;
|
||||
}
|
||||
|
||||
// Found SuperSlab - determine slab index
|
||||
result.ss = ss;
|
||||
result.class_idx = ss->size_class;
|
||||
|
||||
// Calculate slab index
|
||||
uintptr_t ptr_addr = (uintptr_t)ptr;
|
||||
uintptr_t ss_addr = (uintptr_t)ss;
|
||||
|
||||
if (ptr_addr < ss_addr) {
|
||||
// Pointer before SuperSlab base (invalid)
|
||||
result.kind = PTR_KIND_UNKNOWN;
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t offset = ptr_addr - ss_addr;
|
||||
result.slab_idx = (int)(offset / SLAB_SIZE);
|
||||
|
||||
// Validate slab index (ss_slabs_capacity defined in superslab_inline.h)
|
||||
if (result.slab_idx < 0 || result.slab_idx >= ss_slabs_capacity(ss)) {
|
||||
// Out of range
|
||||
result.kind = PTR_KIND_UNKNOWN;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Valid Tiny allocation (headerless)
|
||||
// Note: C7 (1KB) is the only headerless class, but Registry handles all
|
||||
result.kind = PTR_KIND_TINY_HEADERLESS;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ========== Pool TLS Probe ==========
|
||||
|
||||
#ifdef HAKMEM_POOL_TLS_PHASE1
|
||||
// Check if pointer has Pool TLS magic (0xb0)
|
||||
// Returns: 1 if Pool TLS, 0 otherwise
|
||||
static inline int is_pool_tls(void* ptr) {
|
||||
// Same safety check as header probe
|
||||
uintptr_t offset_in_page = (uintptr_t)ptr & 0xFFF;
|
||||
if (offset_in_page == 0) {
|
||||
return 0; // Page-aligned, skip header read
|
||||
}
|
||||
|
||||
uint8_t* header_ptr = (uint8_t*)ptr - 1;
|
||||
uint8_t header = *header_ptr;
|
||||
|
||||
return (header & 0xF0) == POOL_MAGIC;
|
||||
}
|
||||
#endif
|
||||
|
||||
// ========== Front Gate Entry Point ==========
|
||||
|
||||
ptr_classification_t classify_ptr(void* ptr) {
|
||||
ptr_classification_t result = {
|
||||
.kind = PTR_KIND_UNKNOWN,
|
||||
.class_idx = -1,
|
||||
.ss = NULL,
|
||||
.slab_idx = -1
|
||||
};
|
||||
|
||||
if (!ptr) return result;
|
||||
|
||||
// Step 1: Try safe header probe (C0-C6 fast path: 5-10 cycles)
|
||||
// Skip header probe on 1KB-aligned pointers to avoid misclassifying C7/headerless
|
||||
int class_idx = -1;
|
||||
if (((uintptr_t)ptr & 0x3FF) != 0) {
|
||||
class_idx = safe_header_probe(ptr);
|
||||
}
|
||||
if (class_idx >= 0) {
|
||||
// Header found - C0-C6 with header
|
||||
result.kind = PTR_KIND_TINY_HEADER;
|
||||
result.class_idx = class_idx;
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
g_classify_header_hit++;
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
// Step 2: Check Pool TLS (before Registry to avoid false positives)
|
||||
#ifdef HAKMEM_POOL_TLS_PHASE1
|
||||
if (is_pool_tls(ptr)) {
|
||||
result.kind = PTR_KIND_POOL_TLS;
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
g_classify_pool_hit++;
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Step 3: Fallback to Registry lookup (C7 headerless or header failed)
|
||||
result = registry_lookup(ptr);
|
||||
if (result.kind == PTR_KIND_TINY_HEADERLESS) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
g_classify_headerless_hit++;
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
// Step 4: Not Tiny or Pool - return UNKNOWN
|
||||
// Caller should check AllocHeader (16-byte) or delegate to system free
|
||||
result.kind = PTR_KIND_UNKNOWN;
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
g_classify_unknown_hit++;
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
38
core/box/front_gate_classifier.d
Normal file
38
core/box/front_gate_classifier.d
Normal file
@ -0,0 +1,38 @@
|
||||
core/box/front_gate_classifier.o: core/box/front_gate_classifier.c \
|
||||
core/box/front_gate_classifier.h core/box/../tiny_region_id.h \
|
||||
core/box/../hakmem_build_flags.h core/box/../hakmem_tiny_superslab.h \
|
||||
core/box/../superslab/superslab_types.h \
|
||||
core/hakmem_tiny_superslab_constants.h \
|
||||
core/box/../superslab/superslab_inline.h \
|
||||
core/box/../superslab/superslab_types.h core/tiny_debug_ring.h \
|
||||
core/tiny_remote.h core/box/../superslab/../tiny_box_geometry.h \
|
||||
core/box/../superslab/../hakmem_tiny_superslab_constants.h \
|
||||
core/box/../superslab/../hakmem_tiny_config.h \
|
||||
core/box/../tiny_debug_ring.h core/box/../tiny_remote.h \
|
||||
core/box/../hakmem_tiny_superslab_constants.h \
|
||||
core/box/../superslab/superslab_inline.h \
|
||||
core/box/../hakmem_build_flags.h core/box/../hakmem_tiny_config.h \
|
||||
core/box/../hakmem_super_registry.h core/box/../hakmem_tiny_superslab.h \
|
||||
core/box/../pool_tls.h
|
||||
core/box/front_gate_classifier.h:
|
||||
core/box/../tiny_region_id.h:
|
||||
core/box/../hakmem_build_flags.h:
|
||||
core/box/../hakmem_tiny_superslab.h:
|
||||
core/box/../superslab/superslab_types.h:
|
||||
core/hakmem_tiny_superslab_constants.h:
|
||||
core/box/../superslab/superslab_inline.h:
|
||||
core/box/../superslab/superslab_types.h:
|
||||
core/tiny_debug_ring.h:
|
||||
core/tiny_remote.h:
|
||||
core/box/../superslab/../tiny_box_geometry.h:
|
||||
core/box/../superslab/../hakmem_tiny_superslab_constants.h:
|
||||
core/box/../superslab/../hakmem_tiny_config.h:
|
||||
core/box/../tiny_debug_ring.h:
|
||||
core/box/../tiny_remote.h:
|
||||
core/box/../hakmem_tiny_superslab_constants.h:
|
||||
core/box/../superslab/superslab_inline.h:
|
||||
core/box/../hakmem_build_flags.h:
|
||||
core/box/../hakmem_tiny_config.h:
|
||||
core/box/../hakmem_super_registry.h:
|
||||
core/box/../hakmem_tiny_superslab.h:
|
||||
core/box/../pool_tls.h:
|
||||
78
core/box/front_gate_classifier.h
Normal file
78
core/box/front_gate_classifier.h
Normal file
@ -0,0 +1,78 @@
|
||||
// front_gate_classifier.h - Box FG: Pointer Classification Front Gate
|
||||
//
|
||||
// Purpose: Single point of truth for classifying pointers (Tiny/Pool/Mid/Large)
|
||||
// Design: Heuristic-free, safe header probe + Registry lookup fallback
|
||||
//
|
||||
// Key Rules:
|
||||
// 1. ptr-1 is read ONLY here (never elsewhere)
|
||||
// 2. Header probe only when safe (same page + readable)
|
||||
// 3. C7 (headerless) always identified via Registry
|
||||
// 4. No 1KB alignment heuristics (eliminated false positives)
|
||||
//
|
||||
// Architecture:
|
||||
// - Box FG (this): Classification authority
|
||||
// - Box REG: SuperSlab registry (O(1) reverse lookup)
|
||||
// - Box TLS: next pointer clearing for C7
|
||||
//
|
||||
// Performance:
|
||||
// - Fast path (C0-C6 header): 5-10 cycles (unchanged)
|
||||
// - Slow path (C7 REG): 50-100 cycles (rare)
|
||||
// - Safety: SEGV eliminated, false positive = 0%
|
||||
|
||||
#ifndef FRONT_GATE_CLASSIFIER_H
|
||||
#define FRONT_GATE_CLASSIFIER_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
// Forward declaration
|
||||
struct SuperSlab;
|
||||
|
||||
// Pointer classification kinds
|
||||
typedef enum {
|
||||
PTR_KIND_TINY_HEADER, // C0-C6: Has 1-byte header (fast path)
|
||||
PTR_KIND_TINY_HEADERLESS, // C7: Headerless 1KB blocks (REG path)
|
||||
PTR_KIND_POOL_TLS, // Pool TLS 8KB-52KB
|
||||
PTR_KIND_MID_LARGE, // Mid/Large allocations
|
||||
PTR_KIND_UNKNOWN // Unknown/external allocation
|
||||
} tiny_ptr_kind_t;
|
||||
|
||||
// Classification result
|
||||
typedef struct {
|
||||
tiny_ptr_kind_t kind; // Classification result
|
||||
int class_idx; // Tiny class (0-7), or -1 if not Tiny
|
||||
struct SuperSlab* ss; // SuperSlab pointer (from Registry, or NULL)
|
||||
int slab_idx; // Slab index within SuperSlab (or -1)
|
||||
} ptr_classification_t;
|
||||
|
||||
// ========== Front Gate API ==========
|
||||
|
||||
// Classify pointer (single point of truth)
|
||||
// Returns: Classification result with kind, class_idx, SuperSlab
|
||||
//
|
||||
// Strategy:
|
||||
// 1. Try safe header probe (C0-C6 fast path: 5-10 cycles)
|
||||
// 2. Fallback to Registry lookup (C7 or header failed)
|
||||
// 3. Check Pool TLS magic
|
||||
// 4. Check AllocHeader (16-byte malloc/mmap)
|
||||
// 5. Return UNKNOWN if all fail
|
||||
//
|
||||
// Safety:
|
||||
// - Header probe only if: (ptr & 0xFFF) >= 1 (same page)
|
||||
// - No 1KB alignment heuristics
|
||||
// - Registry provides ground truth for headerless
|
||||
ptr_classification_t classify_ptr(void* ptr);
|
||||
|
||||
// ========== Debug/Stats (optional) ==========
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Track classification hit rates
|
||||
extern __thread uint64_t g_classify_header_hit;
|
||||
extern __thread uint64_t g_classify_headerless_hit;
|
||||
extern __thread uint64_t g_classify_pool_hit;
|
||||
extern __thread uint64_t g_classify_unknown_hit;
|
||||
|
||||
void front_gate_print_stats(void);
|
||||
#endif
|
||||
|
||||
#endif // FRONT_GATE_CLASSIFIER_H
|
||||
@ -4,6 +4,7 @@
|
||||
|
||||
#include "hakmem_tiny_superslab.h" // For SUPERSLAB_MAGIC, SuperSlab
|
||||
#include "../tiny_free_fast_v2.inc.h" // Phase 7: Header-based ultra-fast free
|
||||
#include "front_gate_classifier.h" // Box FG: Centralized pointer classification
|
||||
|
||||
#ifdef HAKMEM_POOL_TLS_PHASE1
|
||||
#include "../pool_tls.h"
|
||||
@ -78,120 +79,62 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef HAKMEM_POOL_TLS_PHASE1
|
||||
// Phase 1: Try Pool TLS free FIRST for 8KB-52KB range
|
||||
// CRITICAL: Must come before Phase 7 Tiny to avoid magic mismatch SEGV
|
||||
// Pool TLS uses magic 0xb0, Tiny uses magic 0xa0 - must distinguish!
|
||||
{
|
||||
void* header_addr = (char*)ptr - 1;
|
||||
|
||||
// Safety vs performance trade-off:
|
||||
// - If HAKMEM_TINY_SAFE_FREE=1 (strict), validate with mincore() always
|
||||
// - Else (default), only validate on page-boundary risk to avoid syscall cost
|
||||
#if HAKMEM_TINY_SAFE_FREE
|
||||
if (!hak_is_memory_readable(header_addr)) { goto skip_pool_tls; }
|
||||
#else
|
||||
uintptr_t off = (uintptr_t)header_addr & 0xFFF;
|
||||
if (__builtin_expect(off == 0, 0)) {
|
||||
if (!hak_is_memory_readable(header_addr)) { goto skip_pool_tls; }
|
||||
}
|
||||
#endif
|
||||
|
||||
uint8_t header = *(uint8_t*)header_addr;
|
||||
|
||||
if ((header & 0xF0) == POOL_MAGIC) {
|
||||
pool_free(ptr);
|
||||
hak_free_route_log("pool_tls", ptr);
|
||||
goto done;
|
||||
}
|
||||
// Not Pool TLS - fall through to other paths
|
||||
}
|
||||
skip_pool_tls:
|
||||
#endif
|
||||
// ========== Box FG: Single Point of Classification ==========
|
||||
// Classify pointer once using Front Gate (safe header probe + Registry fallback)
|
||||
// This eliminates all scattered ptr-1 reads and centralizes classification logic
|
||||
ptr_classification_t classification = classify_ptr(ptr);
|
||||
|
||||
// Route based on classification result
|
||||
switch (classification.kind) {
|
||||
case PTR_KIND_TINY_HEADER: {
|
||||
// C0-C6: Has 1-byte header, class_idx already determined by Front Gate
|
||||
// Fast path: Use class_idx directly without SuperSlab lookup
|
||||
hak_free_route_log("tiny_header", ptr);
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// Phase 7: Dual-header dispatch (1-byte Tiny header OR 16-byte malloc/mmap header)
|
||||
//
|
||||
// Step 1: Try 1-byte Tiny header (fast path: 5-10 cycles)
|
||||
if (__builtin_expect(hak_tiny_free_fast_v2(ptr), 1)) {
|
||||
hak_free_route_log("header_fast", ptr);
|
||||
// Use ultra-fast free path with pre-determined class_idx
|
||||
if (__builtin_expect(hak_tiny_free_fast_v2(ptr), 1)) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
hak_free_v2_track_fast(); // Track hit rate in debug
|
||||
hak_free_v2_track_fast();
|
||||
#endif
|
||||
goto done; // Success - done in 5-10 cycles! NO SuperSlab lookup!
|
||||
}
|
||||
|
||||
// Step 2: Try 16-byte AllocHeader (malloc/mmap allocations)
|
||||
// CRITICAL: Must check this BEFORE calling hak_tiny_free() to avoid silent failures!
|
||||
{
|
||||
void* raw = (char*)ptr - HEADER_SIZE;
|
||||
|
||||
// SAFETY: Check if raw header is accessible before dereferencing
|
||||
// This prevents SEGV when malloc metadata is unmapped
|
||||
//
|
||||
// OPTIMIZATION: raw = ptr - HEADER_SIZE (16 bytes)
|
||||
// Page boundary case: if ptr is in first 16 bytes of page, raw crosses page boundary
|
||||
// Check: (ptr & 0xFFF) < HEADER_SIZE → raw might be on previous (unmapped) page
|
||||
uintptr_t offset_in_page = (uintptr_t)ptr & 0xFFF;
|
||||
if (__builtin_expect(offset_in_page < HEADER_SIZE, 0)) {
|
||||
// Potential page boundary crossing - do safety check
|
||||
if (!hak_is_memory_readable(raw)) {
|
||||
goto slow_path_after_step2;
|
||||
}
|
||||
}
|
||||
// Normal case (99.6%): raw is on same page as ptr (no mincore call!)
|
||||
|
||||
// Safe to dereference now
|
||||
AllocHeader* hdr = (AllocHeader*)raw;
|
||||
|
||||
if (hdr->magic == HAKMEM_MAGIC) {
|
||||
// Valid 16-byte header found (malloc/mmap allocation)
|
||||
hak_free_route_log("header_16byte", ptr);
|
||||
|
||||
if (hdr->method == ALLOC_METHOD_MALLOC) {
|
||||
// CRITICAL: raw was allocated with __libc_malloc, so free with __libc_free
|
||||
extern void __libc_free(void*);
|
||||
__libc_free(raw);
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Handle other methods (mmap, etc) - continue to slow path below
|
||||
}
|
||||
slow_path_after_step2:;
|
||||
}
|
||||
|
||||
// Fallback: Invalid header (non-tiny) or TLS cache full
|
||||
// Fallback to slow path if TLS cache full
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
hak_free_v2_track_slow();
|
||||
hak_free_v2_track_slow();
|
||||
#endif
|
||||
#endif
|
||||
hak_tiny_free(ptr);
|
||||
goto done;
|
||||
}
|
||||
|
||||
case PTR_KIND_TINY_HEADERLESS: {
|
||||
// C7: Headerless 1KB blocks, SuperSlab + slab_idx provided by Registry
|
||||
// Medium path: Use Registry result, no header read needed
|
||||
hak_free_route_log("tiny_headerless", ptr);
|
||||
hak_tiny_free(ptr);
|
||||
goto done;
|
||||
}
|
||||
|
||||
#ifdef HAKMEM_POOL_TLS_PHASE1
|
||||
case PTR_KIND_POOL_TLS: {
|
||||
// Pool TLS: 8KB-52KB allocations with 0xb0 magic
|
||||
hak_free_route_log("pool_tls", ptr);
|
||||
pool_free(ptr);
|
||||
goto done;
|
||||
}
|
||||
#endif
|
||||
|
||||
// SS-first free(既定ON)
|
||||
#if !HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// Only run SS-first if Phase 7 header-based free is not enabled
|
||||
// (Phase 7 already does the SS lookup and handles SS allocations)
|
||||
{
|
||||
static int s_free_to_ss = -2;
|
||||
if (s_free_to_ss == -2) {
|
||||
const char* e = getenv("HAKMEM_TINY_FREE_TO_SS");
|
||||
s_free_to_ss = (e && *e) ? ((*e!='0')?1:0) : 1;
|
||||
}
|
||||
if (s_free_to_ss) {
|
||||
extern int g_use_superslab;
|
||||
if (__builtin_expect(g_use_superslab != 0, 1)) {
|
||||
SuperSlab* ss = hak_super_lookup(ptr);
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
int sidx = slab_index_for(ss, ptr);
|
||||
int cap = ss_slabs_capacity(ss);
|
||||
if (__builtin_expect(sidx >= 0 && sidx < cap, 1)) { hak_free_route_log("ss_hit", ptr); hak_tiny_free(ptr); goto done; }
|
||||
}
|
||||
// FIX: Removed dangerous "guess loop" (lines 92-95)
|
||||
// The loop dereferenced unmapped memory causing SEGV
|
||||
// If registry lookup fails, allocation is not from SuperSlab
|
||||
}
|
||||
case PTR_KIND_UNKNOWN:
|
||||
default: {
|
||||
// Not Tiny or Pool - check 16-byte AllocHeader (Mid/Large/malloc/mmap)
|
||||
// This is the slow path for large allocations
|
||||
break; // Fall through to header dispatch below
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// ========== Slow Path: 16-byte AllocHeader Dispatch ==========
|
||||
// Handle Mid/Large allocations (malloc/mmap/Pool/L25)
|
||||
// Note: All Tiny allocations (C0-C7) already handled by Front Gate above
|
||||
|
||||
// Mid/L25 headerless経路
|
||||
{
|
||||
|
||||
164
core/box/tls_sll_box.h
Normal file
164
core/box/tls_sll_box.h
Normal file
@ -0,0 +1,164 @@
|
||||
// tls_sll_box.h - Box TLS-SLL: Single-Linked List API (C7-safe)
|
||||
//
|
||||
// Purpose: Centralized TLS SLL management with C7 protection
|
||||
// Design: Zero-overhead static inline API, C7 always rejected
|
||||
//
|
||||
// Key Rules:
|
||||
// 1. C7 (1KB headerless) is ALWAYS rejected (returns false/0)
|
||||
// 2. All SLL direct writes MUST go through this API
|
||||
// 3. Pop returns with first 8 bytes cleared for C7 (safety)
|
||||
// 4. Capacity checks prevent overflow
|
||||
//
|
||||
// Architecture:
|
||||
// - Box TLS-SLL (this): Push/Pop/Splice authority
|
||||
// - Caller: Provides capacity limits, handles fallback on failure
|
||||
//
|
||||
// Performance:
|
||||
// - Static inline → zero function call overhead
|
||||
// - C7 check: 1 comparison + predict-not-taken (< 1 cycle)
|
||||
// - Same performance as direct SLL access for C0-C6
|
||||
|
||||
#ifndef TLS_SLL_BOX_H
|
||||
#define TLS_SLL_BOX_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h> // For fprintf in debug
|
||||
#include <stdlib.h> // For abort in debug
|
||||
#include "../hakmem_tiny_config.h" // For TINY_NUM_CLASSES
|
||||
|
||||
// External TLS SLL state (defined elsewhere)
|
||||
extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES];
|
||||
extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
|
||||
|
||||
// ========== Push ==========
|
||||
|
||||
// Push pointer to TLS SLL
|
||||
// Returns: true on success, false if C7 or capacity exceeded
|
||||
//
|
||||
// Safety:
|
||||
// - C7 always rejected (headerless, first 8 bytes = user data)
|
||||
// - Capacity check prevents overflow
|
||||
// - Caller must handle fallback (e.g., meta->freelist)
|
||||
//
|
||||
// Performance: 2-3 cycles (C0-C6), < 1 cycle (C7 fast rejection)
|
||||
static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity) {
|
||||
// CRITICAL: C7 (1KB) is headerless - MUST NOT use TLS SLL
|
||||
// Reason: SLL stores next pointer in first 8 bytes (user data for C7)
|
||||
if (__builtin_expect(class_idx == 7, 0)) {
|
||||
return false; // C7 rejected
|
||||
}
|
||||
|
||||
// Capacity check
|
||||
if (g_tls_sll_count[class_idx] >= capacity) {
|
||||
return false; // SLL full
|
||||
}
|
||||
|
||||
// Push to SLL (standard linked list push)
|
||||
*(void**)ptr = g_tls_sll_head[class_idx];
|
||||
g_tls_sll_head[class_idx] = ptr;
|
||||
g_tls_sll_count[class_idx]++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ========== Pop ==========
|
||||
|
||||
// Pop pointer from TLS SLL
|
||||
// Returns: true on success (writes to *out), false if empty
|
||||
//
|
||||
// Safety:
|
||||
// - C7 protection: clears first 8 bytes on pop (prevents next pointer leak)
|
||||
// - NULL check before deref
|
||||
//
|
||||
// Performance: 3-4 cycles
|
||||
static inline bool tls_sll_pop(int class_idx, void** out) {
|
||||
void* head = g_tls_sll_head[class_idx];
|
||||
if (!head) {
|
||||
return false; // SLL empty
|
||||
}
|
||||
|
||||
// Pop from SLL
|
||||
void* next = *(void**)head;
|
||||
g_tls_sll_head[class_idx] = next;
|
||||
if (g_tls_sll_count[class_idx] > 0) {
|
||||
g_tls_sll_count[class_idx]--;
|
||||
}
|
||||
|
||||
// CRITICAL: C7 (1KB) returns with first 8 bytes cleared
|
||||
// Reason: C7 is headerless, first 8 bytes are user data area
|
||||
// Without this: user sees stale SLL next pointer → corruption
|
||||
// Cost: 1 store instruction (~1 cycle), only for C7 (~1% of allocations)
|
||||
//
|
||||
// Note: C0-C6 have 1-byte header, so first 8 bytes are safe (header hides next)
|
||||
if (__builtin_expect(class_idx == 7, 0)) {
|
||||
*(void**)head = NULL;
|
||||
}
|
||||
|
||||
*out = head;
|
||||
return true;
|
||||
}
|
||||
|
||||
// ========== Splice ==========
|
||||
|
||||
// Splice chain of pointers to TLS SLL (batch push)
|
||||
// Returns: actual count moved (0 for C7 or if capacity exceeded)
|
||||
//
|
||||
// Safety:
|
||||
// - C7 always returns 0 (no splice)
|
||||
// - Capacity check limits splice size
|
||||
// - Chain traversal with safety (breaks on NULL)
|
||||
//
|
||||
// Performance: ~5 cycles + O(count) for chain traversal
|
||||
static inline uint32_t tls_sll_splice(int class_idx, void* chain_head, uint32_t count, uint32_t capacity) {
|
||||
// CRITICAL: C7 (1KB) is headerless - MUST NOT splice to TLS SLL
|
||||
if (__builtin_expect(class_idx == 7, 0)) {
|
||||
return 0; // C7 rejected
|
||||
}
|
||||
|
||||
// Calculate available capacity
|
||||
uint32_t available = (capacity > g_tls_sll_count[class_idx])
|
||||
? (capacity - g_tls_sll_count[class_idx]) : 0;
|
||||
if (available == 0 || count == 0 || !chain_head) {
|
||||
return 0; // No space or empty chain
|
||||
}
|
||||
|
||||
// Limit splice size to available capacity
|
||||
uint32_t to_move = (count < available) ? count : available;
|
||||
|
||||
// Find chain tail (traverse to_move - 1 nodes)
|
||||
void* tail = chain_head;
|
||||
for (uint32_t i = 1; i < to_move; i++) {
|
||||
void* next = *(void**)tail;
|
||||
if (!next) {
|
||||
// Chain shorter than expected, adjust to_move
|
||||
to_move = i;
|
||||
break;
|
||||
}
|
||||
tail = next;
|
||||
}
|
||||
|
||||
// Splice chain to SLL head
|
||||
*(void**)tail = g_tls_sll_head[class_idx];
|
||||
g_tls_sll_head[class_idx] = chain_head;
|
||||
g_tls_sll_count[class_idx] += to_move;
|
||||
|
||||
return to_move;
|
||||
}
|
||||
|
||||
// ========== Debug/Stats (optional) ==========
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Verify C7 is not in SLL (debug only, call at safe points)
|
||||
static inline void tls_sll_verify_no_c7(void) {
|
||||
void* head = g_tls_sll_head[7];
|
||||
if (head != NULL) {
|
||||
fprintf(stderr, "[TLS_SLL_BUG] C7 found in TLS SLL! head=%p count=%u\n",
|
||||
head, g_tls_sll_count[7]);
|
||||
fprintf(stderr, "[TLS_SLL_BUG] This should NEVER happen - C7 is headerless!\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TLS_SLL_BOX_H
|
||||
Reference in New Issue
Block a user