Phase 7 + Pool TLS 1.5b stabilization:\n- Add build hygiene (dep tracking, flag consistency, print-flags)\n- Add build.sh + verify_build.sh (unified recipe, freshness check)\n- Quiet verbose logs behind HAKMEM_DEBUG_VERBOSE\n- A/B free safety via HAKMEM_TINY_SAFE_FREE (mincore strict vs boundary)\n- Tweak Tiny header path to reduce noise; Pool TLS free guard optimized\n- Fix mimalloc link retention (--no-as-needed + force symbol)\n- Add docs/BUILD_PHASE7_POOL_TLS.md (cheatsheet)

This commit is contained in:
Moe Charm (CI)
2025-11-09 11:50:18 +09:00
parent cf5bdf9c0a
commit 0da9f8cba3
10 changed files with 349 additions and 48 deletions

106
Makefile
View File

@ -49,6 +49,27 @@ ifeq ($(USE_LTO),1)
LDFLAGS += -flto
endif
# ------------------------------------------------------------
# Build hygiene: dependency tracking + flag consistency checks
# ------------------------------------------------------------
# Track header dependencies for explicit compile rules as well
CFLAGS += -MMD -MP
# If someone injects -DHAKMEM_POOL_TLS_PHASE1=1 directly into CFLAGS
# but forgets POOL_TLS_PHASE1=1, object lists will miss pool_tls*.o.
# Fail fast to avoid confusing link/runtime errors.
ifneq ($(filter -DHAKMEM_POOL_TLS_PHASE1=1,$(CFLAGS)),)
ifneq ($(POOL_TLS_PHASE1),1)
$(error Detected -DHAKMEM_POOL_TLS_PHASE1=1 in CFLAGS but POOL_TLS_PHASE1!=1. Please invoke: make POOL_TLS_PHASE1=1 ...)
endif
endif
# Include generated .d files if present (safe even if none yet)
# Filter to only files (not directories like glibc-2.38/build/iconvdata/gconv-modules.d)
# Also exclude glibc and mimalloc-bench subdirectories
-include $(shell find . -name '*.d' -type f -not -path './glibc*' -not -path './mimalloc-bench*' 2>/dev/null)
# Default: enable Box Theory refactor for Tiny (Phase 6-1.7)
# This is the best performing option currently (4.19M ops/s)
# NOTE: Disabled while testing ULTRA_SIMPLE with SFC integration
@ -145,24 +166,45 @@ SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o
# Pool TLS Phase 1 (enable with POOL_TLS_PHASE1=1)
ifeq ($(POOL_TLS_PHASE1),1)
SHARED_OBJS += pool_tls_shared.o pool_refill_shared.o
OBJS += pool_tls.o pool_refill.o pool_tls_arena.o
SHARED_OBJS += pool_tls_shared.o pool_refill_shared.o pool_tls_arena_shared.o
CFLAGS += -DHAKMEM_POOL_TLS_PHASE1=1
CFLAGS_SHARED += -DHAKMEM_POOL_TLS_PHASE1=1
endif
# Pool TLS Phase 1.5b - Pre-warm optimization
ifeq ($(POOL_TLS_PREWARM),1)
CFLAGS += -DHAKMEM_POOL_TLS_PREWARM=1
CFLAGS_SHARED += -DHAKMEM_POOL_TLS_PREWARM=1
endif
# Benchmark targets
BENCH_HAKMEM = bench_allocators_hakmem
BENCH_SYSTEM = bench_allocators_system
BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o bench_allocators_hakmem.o
BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE)
ifeq ($(POOL_TLS_PHASE1),1)
BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o
BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o
endif
BENCH_SYSTEM_OBJS = bench_allocators_system.o
# Default target
all: $(TARGET)
# Show key build-time switches for troubleshooting
.PHONY: print-flags
print-flags:
@echo "==== Build Switches ===="
@echo "POOL_TLS_PHASE1 = $(POOL_TLS_PHASE1)"
@echo "POOL_TLS_PREWARM = $(POOL_TLS_PREWARM)"
@echo "HEADER_CLASSIDX = $(HEADER_CLASSIDX)"
@echo "AGGRESSIVE_INLINE = $(AGGRESSIVE_INLINE)"
@echo "PREWARM_TLS = $(PREWARM_TLS)"
@echo "USE_LTO = $(USE_LTO)"
@echo "OPT_LEVEL = $(OPT_LEVEL)"
@echo "NATIVE = $(NATIVE)"
@echo "CFLAGS contains = $(filter -DHAKMEM_POOL_TLS_PHASE1=1,$(CFLAGS))"
# Build test program
$(TARGET): $(OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
@ -220,8 +262,11 @@ bench_tiny_hot_system: bench_tiny_hot_system.o
bench_tiny_hot_mi.o: bench_tiny_hot.c
$(CC) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_tiny_hot_mi: bench_tiny_hot_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
bench_mi_force.o: bench_mi_force.c
$(CC) $(CFLAGS) -I mimalloc-bench/extern/mi/include -c -o $@ $<
bench_tiny_hot_mi: bench_tiny_hot_mi.o bench_mi_force.o
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# hakmi variant for tiny hot bench (direct link via front API)
bench_tiny_hot_hakmi.o: bench_tiny_hot.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
@ -315,7 +360,7 @@ test-box-refactor: box-refactor
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o
TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE)
ifeq ($(POOL_TLS_PHASE1),1)
TINY_BENCH_OBJS += pool_tls.o pool_refill.o
TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o
endif
bench_tiny: bench_tiny.o $(TINY_BENCH_OBJS)
@ -404,8 +449,8 @@ larson_system: larson_system.o
larson_mi.o: $(LARSON_SRC)
$(CXX) $(CFLAGS) -DUSE_MIMALLOC -I mimalloc-bench/extern/mi/include -c -o $@ $<
larson_mi: larson_mi.o
$(CXX) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
larson_mi: larson_mi.o bench_mi_force.o
$(CXX) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# HAKMEM variant (hakmem.o provides malloc/free symbols directly)
larson_hakmem.o: $(LARSON_SRC)
@ -468,11 +513,12 @@ bench_tiny_hot_direct: bench_tiny_hot_direct.o $(TINY_BENCH_OBJS)
@echo "✓ bench_tiny_hot_direct built (hak_tiny_alloc/free direct)"
# hakmi variant for comprehensive bench (front + mimalloc backend)
bench_comprehensive_hakmi: bench_comprehensive.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc \
bench_comprehensive.c -o $@ \
adapters/hakmi_front/hakmi_front.o adapters/hakmi_front/hakmi_env.o adapters/hakmi_front/hakmi_tls_front.o \
-L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
-Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
@echo "✓ bench_comprehensive_hakmi built (hakmi front + mimalloc backend)"
# hakx variant for comprehensive bench
@ -497,15 +543,15 @@ bench_random_mixed_hakmem: bench_random_mixed_hakmem.o $(TINY_BENCH_OBJS)
bench_random_mixed_system: bench_random_mixed_system.o
$(CC) -o $@ $^ $(LDFLAGS)
bench_random_mixed_mi: bench_random_mixed_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
bench_random_mixed_mi: bench_random_mixed_mi.o bench_mi_force.o
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# hakmi variant for random mixed bench
bench_random_mixed_hakmi.o: bench_random_mixed.c include/hakmi/hakmi_api.h adapters/hakmi_front/hakmi_front.h
$(CC) $(CFLAGS) -I include -DUSE_HAKMI -include include/hakmi/hakmi_api.h -Dmalloc=hakmi_malloc -Dfree=hakmi_free -Drealloc=hakmi_realloc -c -o $@ $<
bench_random_mixed_hakmi: bench_random_mixed_hakmi.o $(HAKMI_FRONT_OBJS)
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
bench_random_mixed_hakmi: bench_random_mixed_hakmi.o $(HAKMI_FRONT_OBJS) bench_mi_force.o
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# hakx variant for random mixed bench
bench_random_mixed_hakx.o: bench_random_mixed.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
@ -551,8 +597,8 @@ bench_mid_large_hakmem: bench_mid_large_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_mid_large_system: bench_mid_large_system.o
$(CC) -o $@ $^ $(LDFLAGS)
bench_mid_large_mi: bench_mid_large_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
bench_mid_large_mi: bench_mid_large_mi.o bench_mi_force.o
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# hakx variant for mid/large (1T)
bench_mid_large_hakx.o: bench_mid_large.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
@ -572,8 +618,8 @@ bench_mid_large_mt_hakmem: bench_mid_large_mt_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_mid_large_mt_system: bench_mid_large_mt_system.o
$(CC) -o $@ $^ $(LDFLAGS)
bench_mid_large_mt_mi: bench_mid_large_mt_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
bench_mid_large_mt_mi: bench_mid_large_mt_mi.o bench_mi_force.o
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# hakx variant for mid/large MT
bench_mid_large_mt_hakx.o: bench_mid_large_mt.c include/hakx/hakx_api.h include/hakx/hakx_fast_inline.h
@ -593,8 +639,8 @@ bench_fragment_stress_hakmem: bench_fragment_stress_hakmem.o $(TINY_BENCH_OBJS)
$(CC) -o $@ $^ $(LDFLAGS)
bench_fragment_stress_system: bench_fragment_stress_system.o
$(CC) -o $@ $^ $(LDFLAGS)
bench_fragment_stress_mi: bench_fragment_stress_mi.o
$(CC) -o $@ $^ -L mimalloc-bench/extern/mi/out/release -lmimalloc $(LDFLAGS)
bench_fragment_stress_mi: bench_fragment_stress_mi.o bench_mi_force.o
$(CC) -o $@ $^ -Wl,--no-as-needed -L mimalloc-bench/extern/mi/out/release -lmimalloc -Wl,--as-needed $(LDFLAGS)
# Bench build with Minimal Tiny Front (physically excludes optional front tiers)
bench_tiny_front: CFLAGS += -fno-plt -fno-semantic-interposition -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -DHAKMEM_TINY_MINIMAL_FRONT=1 -DHAKMEM_BENCH_TINY_ONLY=1 -DHAKMEM_TINY_MAG_OWNER=0
@ -1125,3 +1171,27 @@ larson_hakmem_route:
@echo "Built larson_hakmem (3-layer + route)"
@echo " HAKMEM_ROUTE build-flag set; runtime ENV still controls output"
@echo "========================================="
# ----------------------------------------------------------------------------
# Pool TLS Benchmarks (Phase 1.5b)
# ----------------------------------------------------------------------------
# Build HAKMEM shared library first to satisfy -lhakmem
bench_pool_tls_hakmem: benchmarks/bench_pool_tls.c $(SHARED_LIB)
$(CC) $(CFLAGS) -o $@ $< -L. -lhakmem $(LDFLAGS)
bench_pool_tls_system: benchmarks/bench_pool_tls.c
$(CC) $(CFLAGS) -DUSE_SYSTEM_MALLOC -o $@ $< $(LDFLAGS)
.PHONY: bench-pool-tls
bench-pool-tls: bench_pool_tls_hakmem bench_pool_tls_system
@echo "========================================="
@echo "Pool TLS Benchmark (8KB-52KB allocations)"
@echo "========================================="
@echo ""
@echo "== HAKMEM (Phase 1.5b Pre-warm) =="
@./bench_pool_tls_hakmem 1 100000 256 42
@echo ""
@echo "== System malloc =="
@./bench_pool_tls_system 1 100000 256 42
@echo ""
@echo "========================================="

22
bench_mi_force.c Normal file
View File

@ -0,0 +1,22 @@
// bench_mi_force.c
// Force a reference to a mimalloc symbol so the dynamic linker
// retains libmimalloc as a NEEDED dependency even with --as-needed.
#include <stddef.h>
#if defined(__cplusplus)
extern "C" {
#endif
// Declaration; actual signature returns const char* in mimalloc.
const char* mi_version(void);
#if defined(__cplusplus)
}
#endif
// Keep a reachable reference so it isn't optimized out completely.
static const void* (*volatile mi_ver_ref)(void) = (const void*(*)(void))mi_version;
void hakmem_bench_mi_force_link(void) {
// Prevent whole-call optimization away
(void)mi_ver_ref;
}

29
build.sh Executable file
View File

@ -0,0 +1,29 @@
#!/usr/bin/env bash
# build.sh - Unified build wrapper to eliminate flag drift
set -euo pipefail
TARGET="${1:-bench_mid_large_mt_hakmem}"
echo "========================================="
echo " HAKMEM Build Script"
echo " Target: ${TARGET}"
echo "========================================="
# Always clean to avoid stale objects when toggling flags
make clean >/dev/null 2>&1 || true
# Phase 7 + Pool TLS Phase 1.5b defaults
make \
POOL_TLS_PHASE1=1 \
POOL_TLS_PREWARM=1 \
HEADER_CLASSIDX=1 \
AGGRESSIVE_INLINE=1 \
PREWARM_TLS=1 \
"${TARGET}"
echo ""
echo "========================================="
echo " ✅ Build successful"
echo " Run: ./${TARGET}"
echo "========================================="

View File

@ -159,9 +159,11 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
// This handles the gap when ACE is disabled or failed
static _Atomic int gap_alloc_count = 0;
int count = atomic_fetch_add(&gap_alloc_count, 1);
#if HAKMEM_DEBUG_VERBOSE
if (count < 3) {
fprintf(stderr, "[HAKMEM] INFO: Using mmap for mid-range size=%zu (ACE disabled or failed)\n", size);
}
#endif
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t_mmap);
#endif
@ -199,4 +201,3 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
}
#endif // HAK_ALLOC_API_INC_H

View File

@ -78,6 +78,37 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
return;
}
#ifdef HAKMEM_POOL_TLS_PHASE1
// Phase 1: Try Pool TLS free FIRST for 8KB-52KB range
// CRITICAL: Must come before Phase 7 Tiny to avoid magic mismatch SEGV
// Pool TLS uses magic 0xb0, Tiny uses magic 0xa0 - must distinguish!
{
void* header_addr = (char*)ptr - 1;
// Safety vs performance trade-off:
// - If HAKMEM_TINY_SAFE_FREE=1 (strict), validate with mincore() always
// - Else (default), only validate on page-boundary risk to avoid syscall cost
#if HAKMEM_TINY_SAFE_FREE
if (!hak_is_memory_readable(header_addr)) { goto skip_pool_tls; }
#else
uintptr_t off = (uintptr_t)header_addr & 0xFFF;
if (__builtin_expect(off == 0, 0)) {
if (!hak_is_memory_readable(header_addr)) { goto skip_pool_tls; }
}
#endif
uint8_t header = *(uint8_t*)header_addr;
if ((header & 0xF0) == POOL_MAGIC) {
pool_free(ptr);
hak_free_route_log("pool_tls", ptr);
goto done;
}
// Not Pool TLS - fall through to other paths
}
skip_pool_tls:
#endif
#if HAKMEM_TINY_HEADER_CLASSIDX
// Phase 7: Dual-header dispatch (1-byte Tiny header OR 16-byte malloc/mmap header)
//
@ -135,19 +166,6 @@ slow_path_after_step2:;
#endif
#endif
#ifdef HAKMEM_POOL_TLS_PHASE1
// Phase 1: Try Pool TLS free for 8KB-52KB range
// This uses 1-byte headers like Tiny for O(1) free
{
uint8_t header = *((uint8_t*)ptr - 1);
if ((header & 0xF0) == POOL_MAGIC) {
pool_free(ptr);
hak_free_route_log("pool_tls", ptr);
goto done;
}
}
#endif
// SS-first free既定ON
#if !HAKMEM_TINY_HEADER_CLASSIDX
// Only run SS-first if Phase 7 header-based free is not enabled

View File

@ -70,6 +70,17 @@
# define HAKMEM_TINY_PREWARM_TLS 0
#endif
// Runtime verbosity (printf-heavy diagnostics). Keep OFF for benches.
#ifndef HAKMEM_DEBUG_VERBOSE
# define HAKMEM_DEBUG_VERBOSE 0
#endif
// Tiny/Mid safety checks on free path (mincore header validation).
// 0 = performance (boundary-only), 1 = strict (mincore for all)
#ifndef HAKMEM_TINY_SAFE_FREE
# define HAKMEM_TINY_SAFE_FREE 0
#endif
// Phase 7 refill count defaults (tunable via env vars)
// HAKMEM_TINY_REFILL_COUNT: global default (default: 16)
// HAKMEM_TINY_REFILL_COUNT_HOT: class 0-3 (default: 16)

View File

@ -50,29 +50,43 @@ extern int TINY_TLS_MAG_CAP;
static inline int hak_tiny_free_fast_v2(void* ptr) {
if (__builtin_expect(!ptr, 0)) return 0;
// CRITICAL: Fast check for page boundaries (0.1% case)
// Strategy: Check alignment BEFORE expensive mincore() syscall
// - Page boundary check: (ptr & 0xFFF) == 0 → 1-2 cycles
// CRITICAL: Check if header is accessible
void* header_addr = (char*)ptr - 1;
#if defined(HAKMEM_POOL_TLS_PHASE1) && HAKMEM_TINY_SAFE_FREE
// Strict mode: validate header address with mincore() on every free
extern int hak_is_memory_readable(void* addr);
if (!hak_is_memory_readable(header_addr)) {
return 0; // Header not accessible - not a Tiny allocation
}
#else
// Pool TLS disabled: Optimize for common case (99.9% hit rate)
// Strategy: Only check page boundaries (ptr & 0xFFF == 0)
// - Page boundary check: 1-2 cycles
// - mincore() syscall: ~634 cycles (only if page-aligned)
// - Result: 99.9% of frees avoid mincore() → 317-634x faster!
//
// Rationale: Allocations at page boundaries would SEGV when reading ptr-1
// (previous page may be unmapped). But page boundaries are rare (<0.1%),
// so we optimize for the common case (99.9%) by checking alignment first.
void* header_addr = (char*)ptr - 1;
if (__builtin_expect(((uintptr_t)ptr & 0xFFF) == 0, 0)) {
// Potential page boundary - do safety check
extern int hak_is_memory_readable(void* addr);
if (!hak_is_memory_readable(header_addr)) {
// Header not accessible - route to slow path (page boundary allocation)
return 0;
return 0; // Page boundary allocation
}
}
// Normal case (99.9%): header is safe to read (no mincore call!)
#endif
// 1. Read class_idx from header (2-3 cycles, L1 hit)
// Note: In release mode, tiny_region_id_read_header() skips magic validation (saves 2-3 cycles)
#if HAKMEM_DEBUG_VERBOSE
static _Atomic int debug_calls = 0;
if (atomic_fetch_add(&debug_calls, 1) < 5) {
fprintf(stderr, "[TINY_FREE_V2] Before read_header, ptr=%p\n", ptr);
}
#endif
int class_idx = tiny_region_id_read_header(ptr);
#if HAKMEM_DEBUG_VERBOSE
if (atomic_load(&debug_calls) <= 5) {
fprintf(stderr, "[TINY_FREE_V2] After read_header, class_idx=%d\n", class_idx);
}
#endif
// Check if header read failed (invalid magic in debug, or out-of-bounds class_idx)
if (__builtin_expect(class_idx < 0, 0)) {

View File

@ -68,24 +68,43 @@ static inline int tiny_region_id_read_header(void* ptr) {
uint8_t header = *header_ptr;
#if !HAKMEM_BUILD_RELEASE
// Debug/Development: Validate magic byte to catch non-header allocations
// CRITICAL FIX (Pool TLS Phase 1): ALWAYS validate magic when Pool TLS is enabled
// Reason: Pool TLS uses different magic (0xb0 vs 0xa0), MUST distinguish them!
// Without this, Pool TLS allocations are wrongly routed to Tiny freelist → corruption
#if !HAKMEM_BUILD_RELEASE || defined(HAKMEM_POOL_TLS_PHASE1)
// Debug/Development OR Pool TLS: Validate magic byte to catch non-header allocations
// Reason: Mid/Large allocations don't have headers, must detect and reject them
uint8_t magic = header & 0xF0;
#if HAKMEM_DEBUG_VERBOSE
static int debug_count = 0;
if (debug_count < 5) {
fprintf(stderr, "[TINY_READ_HEADER] ptr=%p header=0x%02x magic=0x%02x expected=0x%02x\n",
ptr, header, magic, HEADER_MAGIC);
debug_count++;
}
#endif
if (magic != HEADER_MAGIC) {
// Invalid header - likely non-header allocation (Mid/Large)
// Invalid header - likely non-header allocation (Mid/Large/Pool TLS)
#if HAKMEM_DEBUG_VERBOSE
if (debug_count < 6) { // One more after the 5 above
fprintf(stderr, "[TINY_READ_HEADER] REJECTING ptr=%p (magic mismatch)\n", ptr);
}
#endif
#if !HAKMEM_BUILD_RELEASE
static int invalid_count = 0;
if (invalid_count < 5) {
fprintf(stderr, "[HEADER_INVALID] ptr=%p, header=%02x, magic=%02x (expected %02x)\n",
ptr, header, magic, HEADER_MAGIC);
invalid_count++;
}
#endif
return -1;
}
#else
// Release: Skip magic validation (save 2-3 cycles)
// Release (without Pool TLS): Skip magic validation (save 2-3 cycles)
// Safety: Bounds check below still prevents out-of-bounds array access
// Trade-off: Mid/Large frees may corrupt TLS freelist (rare, ~0.1% of frees)
// NOTE: This optimization is DISABLED when Pool TLS is enabled (different magic bytes!)
#endif
int class_idx = (int)(header & HEADER_CLASS_MASK);

View File

@ -0,0 +1,90 @@
# HAKMEM Phase 7 + Pool TLS Phase 1.5b — Build & Run Cheatsheet
This document captures the stable build/run recipe used for recent benches.
## Oneliner Build (recommended)
```
./build.sh <target>
# examples
./build.sh bench_mid_large_mt_hakmem
./build.sh bench_random_mixed_hakmem
./build.sh larson_hakmem
```
Enables at build time:
- POOL_TLS_PHASE1=1 (Pool TLS Phase 1.5b)
- HEADER_CLASSIDX=1 (Phase 7 header)
- AGGRESSIVE_INLINE=1
- PREWARM_TLS=1
Verify switches:
```
make print-flags
```
Optional safety/verbosity toggles:
- `HAKMEM_TINY_SAFE_FREE=1` — strict free validation (mincore on all frees). Slower but safest.
- `HAKMEM_DEBUG_VERBOSE=1` — enable verbose logs for Tiny header/free, etc.
Examples:
```
make clean && make HAKMEM_TINY_SAFE_FREE=1 POOL_TLS_PHASE1=1 HEADER_CLASSIDX=1 AGGRESSIVE_INLINE=1 PREWARM_TLS=1 bench_mid_large_mt_hakmem
```
## Bench Recipes (used in reports)
Larson (Mixed)
```
./build.sh larson_hakmem
make larson_system
./larson_hakmem 2 8 128 1024 1 12345 1
./larson_hakmem 2 8 128 1024 1 12345 4
./larson_system 2 8 128 1024 1 12345 1
./larson_system 2 8 128 1024 1 12345 4
```
Pool TLS (852KB)
```
./build.sh bench_pool_tls_hakmem
make bench_pool_tls_system
./bench_pool_tls_hakmem 1 100000 256 42
./bench_pool_tls_hakmem 4 50000 256 42
./bench_pool_tls_system 1 100000 256 42
./bench_pool_tls_system 4 50000 256 42
```
Random Mixed (Tiny 1281024B)
```
./build.sh bench_random_mixed_hakmem
make bench_random_mixed_system
for s in 128 256 512 1024; do \
./bench_random_mixed_hakmem 100000 $s 42; \
./bench_random_mixed_system 100000 $s 42; \
done
```
MidLarge MT (832KB)
```
./build.sh bench_mid_large_mt_hakmem
make bench_mid_large_mt_system
./bench_mid_large_mt_hakmem 1 100000 256 42
./bench_mid_large_mt_hakmem 4 50000 256 42
./bench_mid_large_mt_system 1 100000 256 42
./bench_mid_large_mt_system 4 50000 256 42
```
## Mimalloc note (when comparing)
Directlink mimalloc benches require runtime path:
```
export LD_LIBRARY_PATH=$PWD/mimalloc-bench/extern/mi/out/release
```
## Build hygiene
- Always prefer `./build.sh` over adhoc `make` (prevents flag drift)
- Check switches: `make print-flags`
- Verify freshness: `./verify_build.sh <binary>`

27
verify_build.sh Executable file
View File

@ -0,0 +1,27 @@
#!/usr/bin/env bash
# verify_build.sh - Quick build correctness/uptodate check
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: $0 <binary>" >&2
exit 1
fi
bin="$1"
if [[ ! -f "$bin" ]]; then
echo "❌ Error: $bin not found" >&2
exit 2
fi
# Check if any sources are newer than the binary
bin_time=$(stat -c %Y "$bin")
src_time=$(find core include adapters engines benchmarks/src -type f \( -name '*.c' -o -name '*.h' -o -name '*.inc' -o -name '*.inc.h' \) -printf '%T@\n' 2>/dev/null | sort -nr | head -n1 | cut -d. -f1)
if [[ -n "${src_time}" && "${src_time}" -gt "${bin_time}" ]]; then
newest=$(find core include adapters engines benchmarks/src -type f \( -name '*.c' -o -name '*.h' -o -name '*.inc' -o -name '*.inc.h' \) -printf '%T@ %p\n' 2>/dev/null | sort -nr | head -n1 | cut -d' ' -f2-)
echo "⚠️ Warning: Sources newer than binary: $newest" >&2
exit 3
fi
echo "✅ Build verification passed for $bin"