From c1c45106da49f6178141495dae85a99a5e90c15b Mon Sep 17 00:00:00 2001 From: "Moe Charm (CI)" Date: Thu, 4 Dec 2025 18:53:04 +0900 Subject: [PATCH] Two-Speed HOT PATH: Guard hak_super_lookup calls with HAKMEM_BUILD_RELEASE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase E2 introduced registry lookup to the hot path, causing 84-88% regression (70M → 9M ops/sec). This commit restores performance by guarding expensive hak_super_lookup calls (50-100 cycles each) with conditional compilation. Key changes: - tls_sll_box.h push: Full validation in Debug, ss_fast_lookup (O(1)) in Release - tls_sll_box.h pop: Registry validation in Debug, trust list structure in Release - tiny_free_fast_v2.inc.h: Header/meta cross-check Debug-only - malloc_tiny_fast.h: SuperSlab registration check Debug-only Performance improvement: - Release build: 2.9M → 87-88M ops/sec (30x improvement) - Restored to historical UNIFIED-HEADER peak (70-80M range) Release builds trust: - Header magic (0xA0) as sufficient allocation origin validation - TLS SLL linked list structure integrity - Header-based class_idx classification Debug builds maintain full validation with expensive registry lookups. 🤖 Generated with Claude Code Co-Authored-By: Claude --- core/box/tls_sll_box.h | 15 +++++++++++++-- core/front/malloc_tiny_fast.h | 4 ++++ core/tiny_free_fast_v2.inc.h | 4 ++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/core/box/tls_sll_box.h b/core/box/tls_sll_box.h index 4bb5ead2..11524a33 100644 --- a/core/box/tls_sll_box.h +++ b/core/box/tls_sll_box.h @@ -606,9 +606,12 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t ptr = tls_sll_normalize_base(class_idx, ptr); void* raw_ptr = HAK_BASE_TO_RAW(ptr); - // Detect meta/class mismatch on push (first few only). - bool push_valid = true; + // TWO-SPEED: Full validation with hak_super_lookup is DEBUG-ONLY. + // Release builds use ss_fast_lookup (O(1) mask arithmetic) for pinning only. SuperSlab* ss_ptr = NULL; +#if !HAKMEM_BUILD_RELEASE + // Debug: Full validation with expensive registry lookup + bool push_valid = true; do { static _Atomic uint32_t g_tls_sll_push_meta_mis = 0; struct SuperSlab* ss = hak_super_lookup(raw_ptr); @@ -652,6 +655,10 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t if (!push_valid) { return false; // Drop malformed pointer instead of corrupting TLS SLL } +#else + // Release: Use fast O(1) lookup for pinning (no validation overhead) + ss_ptr = ss_fast_lookup(raw_ptr); +#endif // !HAKMEM_BUILD_RELEASE #if HAKMEM_TINY_HEADER_CLASSIDX // Validate header on push - detect blocks pushed without header write @@ -1075,6 +1082,9 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch } } while (0); + // TWO-SPEED: Next pointer validation is DEBUG-ONLY to keep HOT PATH fast. + // In Release builds, we trust the linked list structure. +#if !HAKMEM_BUILD_RELEASE // Validate next pointer before installing as new head. if (!hak_base_is_null(next)) { SuperSlab* next_ss = hak_super_lookup(raw_next); @@ -1102,6 +1112,7 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch g_tls_sll[class_idx].count = 0; } } +#endif // !HAKMEM_BUILD_RELEASE #if !HAKMEM_BUILD_RELEASE if (!hak_base_is_null(next) && !validate_ptr_range(raw_next, "tls_sll_pop_next")) { diff --git a/core/front/malloc_tiny_fast.h b/core/front/malloc_tiny_fast.h index 4532d946..4e6e0593 100644 --- a/core/front/malloc_tiny_fast.h +++ b/core/front/malloc_tiny_fast.h @@ -155,11 +155,15 @@ static inline int free_tiny_fast(void* ptr) { // 4. BASE を計算して Unified Cache に push void* base = (void*)((char*)ptr - 1); + // TWO-SPEED: SuperSlab registration check is DEBUG-ONLY to keep HOT PATH fast. + // In Release builds, we trust header magic (0xA0) as sufficient validation. +#if !HAKMEM_BUILD_RELEASE // 5. Superslab 登録確認(誤分類防止) SuperSlab* ss_guard = hak_super_lookup(ptr); if (__builtin_expect(!(ss_guard && ss_guard->magic == SUPERSLAB_MAGIC), 0)) { return 0; // hakmem 管理外 → 通常 free 経路へ } +#endif // !HAKMEM_BUILD_RELEASE // Cross-thread free detection (Larson MT crash fix, ENV gated) { diff --git a/core/tiny_free_fast_v2.inc.h b/core/tiny_free_fast_v2.inc.h index 80d6ba1e..6311f7d9 100644 --- a/core/tiny_free_fast_v2.inc.h +++ b/core/tiny_free_fast_v2.inc.h @@ -162,6 +162,9 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { fprintf(stderr, "[TINY_FREE_V2] After read_header, class_idx=%d\n", class_idx); } #endif + // TWO-SPEED: Header/meta cross-check is DEBUG-ONLY to keep HOT PATH fast. + // In Release builds, we trust the header-based classification. +#if !HAKMEM_BUILD_RELEASE // Cross-check header class vs meta class (if available from fast lookup) do { // Try fast owner slab lookup to get meta->class_idx for comparison @@ -191,6 +194,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { } } } while (0); +#endif // !HAKMEM_BUILD_RELEASE // Check if header read failed (invalid magic in debug, or out-of-bounds class_idx) if (__builtin_expect(class_idx < 0, 0)) {