2025-11-14 01:02:00 +09:00
|
|
|
// tls_sll_box.h - Box TLS-SLL: Single-Linked List API (Unified Box version)
|
2025-11-10 16:48:20 +09:00
|
|
|
//
|
2025-11-14 01:02:00 +09:00
|
|
|
// Goal:
|
|
|
|
|
// - Single authoritative Box for TLS SLL operations.
|
|
|
|
|
// - All next pointer layout is decided by tiny_next_ptr_box.h (Box API).
|
|
|
|
|
// - Callers pass BASE pointers only; no local next_offset arithmetic.
|
|
|
|
|
// - Compatible with existing ptr_trace PTR_NEXT_* macros (off is logging-only).
|
2025-11-10 16:48:20 +09:00
|
|
|
//
|
2025-11-14 01:02:00 +09:00
|
|
|
// Invariants:
|
|
|
|
|
// - g_tiny_class_sizes[cls] is TOTAL stride (including 1-byte header when enabled).
|
|
|
|
|
// - For HEADER_CLASSIDX != 0, tiny_nextptr.h encodes:
|
|
|
|
|
// class 0: next_off = 0
|
2025-11-21 23:00:24 +09:00
|
|
|
// class 1-7: next_off = 1
|
2025-11-14 01:02:00 +09:00
|
|
|
// Callers MUST NOT duplicate this logic.
|
|
|
|
|
// - TLS SLL stores BASE pointers only.
|
|
|
|
|
// - Box provides: push / pop / splice with capacity & integrity checks.
|
2025-11-10 16:48:20 +09:00
|
|
|
|
|
|
|
|
#ifndef TLS_SLL_BOX_H
|
|
|
|
|
#define TLS_SLL_BOX_H
|
|
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <stdbool.h>
|
2025-11-14 01:02:00 +09:00
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <stdlib.h>
|
2025-11-21 23:00:24 +09:00
|
|
|
#include <stdatomic.h>
|
2025-11-14 01:02:00 +09:00
|
|
|
|
|
|
|
|
#include "../hakmem_tiny_config.h"
|
2025-11-10 23:41:53 +09:00
|
|
|
#include "../hakmem_build_flags.h"
|
2025-11-14 01:02:00 +09:00
|
|
|
#include "../tiny_remote.h"
|
|
|
|
|
#include "../tiny_region_id.h"
|
|
|
|
|
#include "../hakmem_tiny_integrity.h"
|
|
|
|
|
#include "../ptr_track.h"
|
|
|
|
|
#include "../ptr_trace.h"
|
Front-Direct implementation: SS→FC direct refill + SLL complete bypass
## Summary
Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)
## New Modules
- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
- Remote drain → Freelist → Carve priority
- Header restoration for C1-C6 (NOT C0/C7)
- ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN
- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition
## Allocation Path (core/tiny_alloc_fast.inc.h)
- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
- Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
- Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)
## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)
- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)
## Legacy Sealing
- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry
## ENV Controls
- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)
## Benchmarks (Front-Direct Enabled)
```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
HAKMEM_TINY_BUMP_CHUNK=256
bench_random_mixed (16-1040B random, 200K iter):
256 slots: 1.44M ops/s (STABLE, 0 SEGV)
128 slots: 1.44M ops/s (STABLE, 0 SEGV)
bench_fixed_size (fixed size, 200K iter):
256B: 4.06M ops/s (has debug logs, expected >10M without logs)
128B: Similar (debug logs affect)
```
## Verification
- TRACE_RING test (10K iter): **0 SLL events** detected ✅
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV
## Next Steps
- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)
Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink
2025-11-14 05:41:49 +09:00
|
|
|
#include "../tiny_debug_ring.h"
|
2025-11-21 23:00:24 +09:00
|
|
|
#include "../hakmem_super_registry.h"
|
|
|
|
|
#include "../superslab/superslab_inline.h"
|
2025-11-14 01:02:00 +09:00
|
|
|
#include "tiny_next_ptr_box.h"
|
|
|
|
|
|
2025-11-21 23:00:24 +09:00
|
|
|
// Per-thread debug shadow: last successful push base per class (release-safe)
|
|
|
|
|
static __thread void* s_tls_sll_last_push[TINY_NUM_CLASSES] = {0};
|
|
|
|
|
|
2025-11-22 11:30:46 +09:00
|
|
|
// Per-thread callsite tracking: last push caller per class (debug-only)
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
static __thread const char* s_tls_sll_last_push_from[TINY_NUM_CLASSES] = {NULL};
|
|
|
|
|
static __thread const char* s_tls_sll_last_pop_from[TINY_NUM_CLASSES] = {NULL};
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-11-20 07:32:30 +09:00
|
|
|
// Phase 3d-B: Unified TLS SLL (defined in hakmem_tiny.c)
|
|
|
|
|
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
|
2025-11-21 23:00:24 +09:00
|
|
|
extern __thread uint64_t g_tls_canary_before_sll;
|
|
|
|
|
extern __thread uint64_t g_tls_canary_after_sll;
|
|
|
|
|
extern __thread const char* g_tls_sll_last_writer[TINY_NUM_CLASSES];
|
2025-11-14 01:05:30 +09:00
|
|
|
extern int g_tls_sll_class_mask; // bit i=1 → SLL allowed for class i
|
2025-11-14 01:02:00 +09:00
|
|
|
|
|
|
|
|
// ========== Debug guard ==========
|
2025-11-10 23:41:53 +09:00
|
|
|
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
2025-11-14 01:02:00 +09:00
|
|
|
static inline void tls_sll_debug_guard(int class_idx, void* base, const char* where)
|
|
|
|
|
{
|
|
|
|
|
(void)class_idx;
|
2025-11-10 23:41:53 +09:00
|
|
|
if ((uintptr_t)base < 4096) {
|
2025-11-14 01:02:00 +09:00
|
|
|
fprintf(stderr,
|
|
|
|
|
"[TLS_SLL_GUARD] %s: suspicious ptr=%p cls=%d\n",
|
|
|
|
|
where, base, class_idx);
|
2025-11-10 23:41:53 +09:00
|
|
|
abort();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#else
|
2025-11-14 01:02:00 +09:00
|
|
|
static inline void tls_sll_debug_guard(int class_idx, void* base, const char* where)
|
|
|
|
|
{
|
|
|
|
|
(void)class_idx; (void)base; (void)where;
|
|
|
|
|
}
|
2025-11-10 23:41:53 +09:00
|
|
|
#endif
|
|
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
// Normalize helper: callers are required to pass BASE already.
|
|
|
|
|
// Kept as a no-op for documentation / future hardening.
|
|
|
|
|
static inline void* tls_sll_normalize_base(int class_idx, void* node)
|
|
|
|
|
{
|
2025-11-21 23:00:24 +09:00
|
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
|
|
|
|
if (node && class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
|
|
|
|
|
extern const size_t g_tiny_class_sizes[];
|
|
|
|
|
size_t stride = g_tiny_class_sizes[class_idx];
|
|
|
|
|
if (__builtin_expect(stride != 0, 1)) {
|
|
|
|
|
uintptr_t delta = (uintptr_t)node % stride;
|
|
|
|
|
if (__builtin_expect(delta == 1, 0)) {
|
|
|
|
|
// USER pointer passed in; normalize to BASE (= user-1) to avoid offset-1 writes.
|
|
|
|
|
void* base = (uint8_t*)node - 1;
|
|
|
|
|
static _Atomic uint32_t g_tls_sll_norm_userptr = 0;
|
|
|
|
|
uint32_t n = atomic_fetch_add_explicit(&g_tls_sll_norm_userptr, 1, memory_order_relaxed);
|
|
|
|
|
if (n < 8) {
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"[TLS_SLL_NORMALIZE_USERPTR] cls=%d node=%p -> base=%p stride=%zu\n",
|
|
|
|
|
class_idx, node, base, stride);
|
|
|
|
|
}
|
|
|
|
|
return base;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#else
|
2025-11-10 23:41:53 +09:00
|
|
|
(void)class_idx;
|
2025-11-21 23:00:24 +09:00
|
|
|
#endif
|
2025-11-10 23:41:53 +09:00
|
|
|
return node;
|
|
|
|
|
}
|
2025-11-10 16:48:20 +09:00
|
|
|
|
2025-11-21 23:00:24 +09:00
|
|
|
// Narrow dump around TLS SLL array when corruption is detected (env-gated)
|
|
|
|
|
static inline void tls_sll_dump_tls_window(int class_idx, const char* stage)
|
|
|
|
|
{
|
|
|
|
|
static _Atomic uint32_t g_tls_sll_diag_shots = 0;
|
|
|
|
|
static int s_diag_enable = -1;
|
|
|
|
|
if (__builtin_expect(s_diag_enable == -1, 0)) {
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_SLL_DIAG");
|
|
|
|
|
s_diag_enable = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
}
|
|
|
|
|
if (!__builtin_expect(s_diag_enable, 0)) return;
|
|
|
|
|
|
|
|
|
|
uint32_t shot = atomic_fetch_add_explicit(&g_tls_sll_diag_shots, 1, memory_order_relaxed);
|
|
|
|
|
if (shot >= 2) return; // limit noise
|
|
|
|
|
|
|
|
|
|
if (shot == 0) {
|
|
|
|
|
// Map TLS layout once to confirm index→address mapping during triage
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"[TLS_SLL_ADDRMAP] before=%p sll=%p after=%p entry_size=%zu\n",
|
|
|
|
|
(void*)&g_tls_canary_before_sll,
|
|
|
|
|
(void*)g_tls_sll,
|
|
|
|
|
(void*)&g_tls_canary_after_sll,
|
|
|
|
|
sizeof(TinyTLSSLL));
|
|
|
|
|
for (int c = 0; c < TINY_NUM_CLASSES; c++) {
|
|
|
|
|
fprintf(stderr, " C%d: head@%p count@%p\n",
|
|
|
|
|
c,
|
|
|
|
|
(void*)&g_tls_sll[c].head,
|
|
|
|
|
(void*)&g_tls_sll[c].count);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"[TLS_SLL_INVALID_POP_DIAG] shot=%u stage=%s cls=%d head=%p count=%u last_push=%p last_writer=%s\n",
|
|
|
|
|
shot + 1,
|
|
|
|
|
stage ? stage : "(null)",
|
|
|
|
|
class_idx,
|
|
|
|
|
g_tls_sll[class_idx].head,
|
|
|
|
|
g_tls_sll[class_idx].count,
|
|
|
|
|
s_tls_sll_last_push[class_idx],
|
|
|
|
|
g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)");
|
|
|
|
|
fprintf(stderr, " tls_sll snapshot (head/count):");
|
|
|
|
|
for (int c = 0; c < TINY_NUM_CLASSES; c++) {
|
|
|
|
|
fprintf(stderr, " C%d:%p/%u", c, g_tls_sll[c].head, g_tls_sll[c].count);
|
|
|
|
|
}
|
|
|
|
|
fprintf(stderr, " canary_before=%#llx canary_after=%#llx\n",
|
|
|
|
|
(unsigned long long)g_tls_canary_before_sll,
|
|
|
|
|
(unsigned long long)g_tls_canary_after_sll);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void tls_sll_record_writer(int class_idx, const char* who)
|
|
|
|
|
{
|
|
|
|
|
if (__builtin_expect(class_idx >= 0 && class_idx < TINY_NUM_CLASSES, 1)) {
|
|
|
|
|
g_tls_sll_last_writer[class_idx] = who;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline int tls_sll_head_valid(void* head)
|
|
|
|
|
{
|
|
|
|
|
uintptr_t a = (uintptr_t)head;
|
|
|
|
|
return (a >= 4096 && a <= 0x00007fffffffffffULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void tls_sll_log_hdr_mismatch(int class_idx, void* base, uint8_t got, uint8_t expect, const char* stage)
|
|
|
|
|
{
|
|
|
|
|
static _Atomic uint32_t g_hdr_mismatch_log = 0;
|
|
|
|
|
uint32_t n = atomic_fetch_add_explicit(&g_hdr_mismatch_log, 1, memory_order_relaxed);
|
|
|
|
|
if (n < 16) {
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"[TLS_SLL_HDR_MISMATCH] stage=%s cls=%d base=%p got=0x%02x expect=0x%02x\n",
|
|
|
|
|
stage ? stage : "(null)",
|
|
|
|
|
class_idx,
|
|
|
|
|
base,
|
|
|
|
|
got,
|
|
|
|
|
expect);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void tls_sll_diag_next(int class_idx, void* base, void* next, const char* stage)
|
|
|
|
|
{
|
|
|
|
|
static int s_diag_enable = -1;
|
|
|
|
|
if (__builtin_expect(s_diag_enable == -1, 0)) {
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_SLL_DIAG");
|
|
|
|
|
s_diag_enable = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
}
|
|
|
|
|
if (!__builtin_expect(s_diag_enable, 0)) return;
|
|
|
|
|
|
|
|
|
|
// Narrow to target classes to preserve early shots
|
|
|
|
|
if (class_idx != 4 && class_idx != 6 && class_idx != 7) return;
|
|
|
|
|
|
|
|
|
|
int in_range = tls_sll_head_valid(next);
|
|
|
|
|
if (in_range) {
|
|
|
|
|
// Range check (abort on clearly bad pointers to catch first offender)
|
|
|
|
|
validate_ptr_range(next, "tls_sll_pop_next_diag");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
SuperSlab* ss = hak_super_lookup(next);
|
|
|
|
|
int slab_idx = ss ? slab_index_for(ss, next) : -1;
|
|
|
|
|
TinySlabMeta* meta = (ss && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) ? &ss->slabs[slab_idx] : NULL;
|
|
|
|
|
int meta_cls = meta ? (int)meta->class_idx : -1;
|
|
|
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
|
|
|
|
int hdr_cls = next ? tiny_region_id_read_header((uint8_t*)next + 1) : -1;
|
|
|
|
|
#else
|
|
|
|
|
int hdr_cls = -1;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
static _Atomic uint32_t g_next_diag_once = 0;
|
|
|
|
|
uint32_t shot = atomic_fetch_add_explicit(&g_next_diag_once, 1, memory_order_relaxed);
|
|
|
|
|
if (shot < 12) {
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"[TLS_SLL_POP_NEXT_DIAG] shot=%u stage=%s cls=%d base=%p next=%p hdr_cls=%d meta_cls=%d slab=%d ss=%p\n",
|
|
|
|
|
shot + 1,
|
|
|
|
|
stage ? stage : "(null)",
|
|
|
|
|
class_idx,
|
|
|
|
|
base,
|
|
|
|
|
next,
|
|
|
|
|
hdr_cls,
|
|
|
|
|
meta_cls,
|
|
|
|
|
slab_idx,
|
|
|
|
|
(void*)ss);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-10 16:48:20 +09:00
|
|
|
// ========== Push ==========
|
2025-11-10 17:02:25 +09:00
|
|
|
//
|
2025-11-14 01:02:00 +09:00
|
|
|
// Push BASE pointer into TLS SLL for given class.
|
|
|
|
|
// Returns true on success, false if capacity full or input invalid.
|
2025-11-22 11:30:46 +09:00
|
|
|
//
|
|
|
|
|
// Implementation function with callsite tracking (where).
|
|
|
|
|
// Use tls_sll_push() macro instead of calling directly.
|
2025-11-14 01:02:00 +09:00
|
|
|
|
2025-11-22 11:30:46 +09:00
|
|
|
static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity, const char* where)
|
2025-11-14 01:02:00 +09:00
|
|
|
{
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_push");
|
|
|
|
|
|
2025-11-14 01:05:30 +09:00
|
|
|
// Class mask gate (narrow triage): if disallowed, reject push
|
|
|
|
|
if (__builtin_expect(((g_tls_sll_class_mask & (1u << class_idx)) == 0), 0)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
// Capacity semantics:
|
|
|
|
|
// - capacity == 0 → disabled (reject)
|
|
|
|
|
// - capacity > 1<<20 → treat as "unbounded" sentinel (no limit)
|
|
|
|
|
if (capacity == 0) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
const uint32_t kCapacityHardMax = (1u << 20);
|
|
|
|
|
const int unlimited = (capacity > kCapacityHardMax);
|
2025-11-10 16:48:20 +09:00
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
if (!ptr) {
|
|
|
|
|
return false;
|
2025-11-10 16:48:20 +09:00
|
|
|
}
|
|
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
// Base pointer only (callers must pass BASE; this is a no-op by design).
|
|
|
|
|
ptr = tls_sll_normalize_base(class_idx, ptr);
|
|
|
|
|
|
2025-11-21 23:00:24 +09:00
|
|
|
// Detect meta/class mismatch on push (first few only).
|
|
|
|
|
do {
|
|
|
|
|
static _Atomic uint32_t g_tls_sll_push_meta_mis = 0;
|
|
|
|
|
struct SuperSlab* ss = hak_super_lookup(ptr);
|
|
|
|
|
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
|
|
|
|
int sidx = slab_index_for(ss, ptr);
|
|
|
|
|
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
|
|
|
|
uint8_t meta_cls = ss->slabs[sidx].class_idx;
|
|
|
|
|
if (meta_cls < TINY_NUM_CLASSES && meta_cls != (uint8_t)class_idx) {
|
|
|
|
|
uint32_t n = atomic_fetch_add_explicit(&g_tls_sll_push_meta_mis, 1, memory_order_relaxed);
|
|
|
|
|
if (n < 4) {
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"[TLS_SLL_PUSH_META_MISMATCH] cls=%d meta_cls=%u base=%p slab_idx=%d ss=%p\n",
|
|
|
|
|
class_idx, (unsigned)meta_cls, ptr, sidx, (void*)ss);
|
|
|
|
|
void* bt[8];
|
|
|
|
|
int frames = backtrace(bt, 8);
|
|
|
|
|
backtrace_symbols_fd(bt, frames, fileno(stderr));
|
|
|
|
|
}
|
|
|
|
|
fflush(stderr);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} while (0);
|
|
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
// Minimal range guard before we touch memory.
|
|
|
|
|
if (!validate_ptr_range(ptr, "tls_sll_push_base")) {
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"[TLS_SLL_PUSH] FATAL invalid BASE ptr cls=%d base=%p\n",
|
|
|
|
|
class_idx, ptr);
|
|
|
|
|
abort();
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
}
|
2025-11-21 23:00:24 +09:00
|
|
|
#else
|
|
|
|
|
// Release: drop malformed ptrs but keep running.
|
|
|
|
|
uintptr_t ptr_addr = (uintptr_t)ptr;
|
|
|
|
|
if (ptr_addr < 4096 || ptr_addr > 0x00007fffffffffffULL) {
|
|
|
|
|
extern _Atomic uint64_t g_tls_sll_invalid_push[];
|
|
|
|
|
uint64_t cnt = atomic_fetch_add_explicit(&g_tls_sll_invalid_push[class_idx], 1, memory_order_relaxed);
|
|
|
|
|
static __thread uint8_t s_log_limit_push[TINY_NUM_CLASSES] = {0};
|
|
|
|
|
if (s_log_limit_push[class_idx] < 4) {
|
|
|
|
|
fprintf(stderr, "[TLS_SLL_PUSH_INVALID] cls=%d base=%p dropped count=%llu\n",
|
|
|
|
|
class_idx, ptr, (unsigned long long)cnt + 1);
|
|
|
|
|
s_log_limit_push[class_idx]++;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
#endif
|
|
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
// Capacity check BEFORE any writes.
|
2025-11-20 07:32:30 +09:00
|
|
|
uint32_t cur = g_tls_sll[class_idx].count;
|
2025-11-14 01:02:00 +09:00
|
|
|
if (!unlimited && cur >= capacity) {
|
|
|
|
|
return false;
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
}
|
|
|
|
|
|
2025-11-10 18:04:08 +09:00
|
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
2025-11-22 02:15:34 +09:00
|
|
|
// Header handling for header classes (class 1-6 only, NOT 0 or 7).
|
|
|
|
|
// C0, C7 use offset=0, so next pointer is at base[0] and MUST NOT restore header.
|
2025-11-14 01:29:55 +09:00
|
|
|
// Safe mode (HAKMEM_TINY_SLL_SAFEHEADER=1): never overwrite header; reject on magic mismatch.
|
|
|
|
|
// Default mode: restore expected header.
|
2025-11-22 02:15:34 +09:00
|
|
|
if (class_idx != 0 && class_idx != 7) {
|
2025-11-14 01:29:55 +09:00
|
|
|
static int g_sll_safehdr = -1;
|
Front-Direct implementation: SS→FC direct refill + SLL complete bypass
## Summary
Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)
## New Modules
- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
- Remote drain → Freelist → Carve priority
- Header restoration for C1-C6 (NOT C0/C7)
- ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN
- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition
## Allocation Path (core/tiny_alloc_fast.inc.h)
- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
- Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
- Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)
## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)
- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)
## Legacy Sealing
- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry
## ENV Controls
- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)
## Benchmarks (Front-Direct Enabled)
```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
HAKMEM_TINY_BUMP_CHUNK=256
bench_random_mixed (16-1040B random, 200K iter):
256 slots: 1.44M ops/s (STABLE, 0 SEGV)
128 slots: 1.44M ops/s (STABLE, 0 SEGV)
bench_fixed_size (fixed size, 200K iter):
256B: 4.06M ops/s (has debug logs, expected >10M without logs)
128B: Similar (debug logs affect)
```
## Verification
- TRACE_RING test (10K iter): **0 SLL events** detected ✅
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV
## Next Steps
- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)
Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink
2025-11-14 05:41:49 +09:00
|
|
|
static int g_sll_ring_en = -1; // optional ring trace for TLS-SLL anomalies
|
2025-11-14 01:29:55 +09:00
|
|
|
if (__builtin_expect(g_sll_safehdr == -1, 0)) {
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_SLL_SAFEHEADER");
|
|
|
|
|
g_sll_safehdr = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
}
|
Front-Direct implementation: SS→FC direct refill + SLL complete bypass
## Summary
Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)
## New Modules
- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
- Remote drain → Freelist → Carve priority
- Header restoration for C1-C6 (NOT C0/C7)
- ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN
- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition
## Allocation Path (core/tiny_alloc_fast.inc.h)
- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
- Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
- Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)
## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)
- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)
## Legacy Sealing
- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry
## ENV Controls
- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)
## Benchmarks (Front-Direct Enabled)
```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
HAKMEM_TINY_BUMP_CHUNK=256
bench_random_mixed (16-1040B random, 200K iter):
256 slots: 1.44M ops/s (STABLE, 0 SEGV)
128 slots: 1.44M ops/s (STABLE, 0 SEGV)
bench_fixed_size (fixed size, 200K iter):
256B: 4.06M ops/s (has debug logs, expected >10M without logs)
128B: Similar (debug logs affect)
```
## Verification
- TRACE_RING test (10K iter): **0 SLL events** detected ✅
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV
## Next Steps
- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)
Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink
2025-11-14 05:41:49 +09:00
|
|
|
if (__builtin_expect(g_sll_ring_en == -1, 0)) {
|
|
|
|
|
const char* r = getenv("HAKMEM_TINY_SLL_RING");
|
|
|
|
|
g_sll_ring_en = (r && *r && *r != '0') ? 1 : 0;
|
|
|
|
|
}
|
2025-11-14 01:02:00 +09:00
|
|
|
uint8_t* b = (uint8_t*)ptr;
|
|
|
|
|
uint8_t expected = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
|
2025-11-21 23:00:24 +09:00
|
|
|
uint8_t got_pre = *b;
|
|
|
|
|
if (__builtin_expect(got_pre != expected, 0)) {
|
|
|
|
|
tls_sll_log_hdr_mismatch(class_idx, ptr, got_pre, expected, "push_preheader");
|
|
|
|
|
}
|
2025-11-14 01:29:55 +09:00
|
|
|
if (g_sll_safehdr) {
|
|
|
|
|
uint8_t got = *b;
|
|
|
|
|
if ((got & 0xF0u) != HEADER_MAGIC) {
|
|
|
|
|
// Reject push silently (fall back to slow path at caller)
|
Front-Direct implementation: SS→FC direct refill + SLL complete bypass
## Summary
Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)
## New Modules
- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
- Remote drain → Freelist → Carve priority
- Header restoration for C1-C6 (NOT C0/C7)
- ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN
- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition
## Allocation Path (core/tiny_alloc_fast.inc.h)
- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
- Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
- Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)
## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)
- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)
## Legacy Sealing
- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry
## ENV Controls
- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)
## Benchmarks (Front-Direct Enabled)
```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
HAKMEM_TINY_BUMP_CHUNK=256
bench_random_mixed (16-1040B random, 200K iter):
256 slots: 1.44M ops/s (STABLE, 0 SEGV)
128 slots: 1.44M ops/s (STABLE, 0 SEGV)
bench_fixed_size (fixed size, 200K iter):
256B: 4.06M ops/s (has debug logs, expected >10M without logs)
128B: Similar (debug logs affect)
```
## Verification
- TRACE_RING test (10K iter): **0 SLL events** detected ✅
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV
## Next Steps
- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)
Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink
2025-11-14 05:41:49 +09:00
|
|
|
if (__builtin_expect(g_sll_ring_en, 0)) {
|
|
|
|
|
// aux encodes: high 8 bits = got, low 8 bits = expected
|
|
|
|
|
uintptr_t aux = ((uintptr_t)got << 8) | (uintptr_t)expected;
|
|
|
|
|
tiny_debug_ring_record(0x7F10 /*TLS_SLL_REJECT*/, (uint16_t)class_idx, ptr, aux);
|
|
|
|
|
}
|
2025-11-14 01:29:55 +09:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
PTR_TRACK_TLS_PUSH(ptr, class_idx);
|
|
|
|
|
PTR_TRACK_HEADER_WRITE(ptr, expected);
|
|
|
|
|
*b = expected;
|
|
|
|
|
}
|
2025-11-14 01:02:00 +09:00
|
|
|
}
|
2025-11-10 18:04:08 +09:00
|
|
|
#endif
|
2025-11-14 01:02:00 +09:00
|
|
|
|
2025-11-10 23:41:53 +09:00
|
|
|
tls_sll_debug_guard(class_idx, ptr, "push");
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
2025-11-14 01:02:00 +09:00
|
|
|
// Optional double-free detection: scan a bounded prefix of the list.
|
2025-11-22 08:43:18 +09:00
|
|
|
// Increased from 64 to 256 to catch orphaned blocks deeper in the chain.
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
{
|
2025-11-20 07:32:30 +09:00
|
|
|
void* scan = g_tls_sll[class_idx].head;
|
2025-11-14 01:02:00 +09:00
|
|
|
uint32_t scanned = 0;
|
2025-11-22 08:43:18 +09:00
|
|
|
const uint32_t limit = (g_tls_sll[class_idx].count < 256)
|
2025-11-20 07:32:30 +09:00
|
|
|
? g_tls_sll[class_idx].count
|
2025-11-22 08:43:18 +09:00
|
|
|
: 256;
|
2025-11-14 01:02:00 +09:00
|
|
|
while (scan && scanned < limit) {
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
if (scan == ptr) {
|
2025-11-14 01:02:00 +09:00
|
|
|
fprintf(stderr,
|
2025-11-22 11:30:46 +09:00
|
|
|
"[TLS_SLL_PUSH_DUP] cls=%d ptr=%p head=%p count=%u scanned=%u last_push=%p last_push_from=%s last_pop_from=%s last_writer=%s where=%s\n",
|
2025-11-22 08:43:18 +09:00
|
|
|
class_idx,
|
|
|
|
|
ptr,
|
|
|
|
|
g_tls_sll[class_idx].head,
|
|
|
|
|
g_tls_sll[class_idx].count,
|
|
|
|
|
scanned,
|
|
|
|
|
s_tls_sll_last_push[class_idx],
|
2025-11-22 11:30:46 +09:00
|
|
|
s_tls_sll_last_push_from[class_idx] ? s_tls_sll_last_push_from[class_idx] : "(null)",
|
|
|
|
|
s_tls_sll_last_pop_from[class_idx] ? s_tls_sll_last_pop_from[class_idx] : "(null)",
|
|
|
|
|
g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)",
|
|
|
|
|
where ? where : "(null)");
|
2025-11-22 08:43:18 +09:00
|
|
|
ptr_trace_dump_now("tls_sll_dup");
|
2025-11-27 05:57:22 +09:00
|
|
|
// ABORT to get backtrace showing exact double-free location
|
|
|
|
|
abort();
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
}
|
2025-11-14 01:02:00 +09:00
|
|
|
void* next;
|
|
|
|
|
PTR_NEXT_READ("tls_sll_scan", class_idx, scan, 0, next);
|
|
|
|
|
scan = next;
|
|
|
|
|
scanned++;
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
// Link new node to current head via Box API (offset is handled inside tiny_nextptr).
|
2025-11-20 07:32:30 +09:00
|
|
|
PTR_NEXT_WRITE("tls_push", class_idx, ptr, 0, g_tls_sll[class_idx].head);
|
|
|
|
|
g_tls_sll[class_idx].head = ptr;
|
2025-11-21 23:00:24 +09:00
|
|
|
tls_sll_record_writer(class_idx, "push");
|
2025-11-20 07:32:30 +09:00
|
|
|
g_tls_sll[class_idx].count = cur + 1;
|
2025-11-21 23:00:24 +09:00
|
|
|
s_tls_sll_last_push[class_idx] = ptr;
|
2025-11-10 16:48:20 +09:00
|
|
|
|
2025-11-22 11:30:46 +09:00
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
// Record callsite for debugging (debug-only)
|
|
|
|
|
s_tls_sll_last_push_from[class_idx] = where;
|
|
|
|
|
#else
|
|
|
|
|
(void)where; // Suppress unused warning in release
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-11-10 16:48:20 +09:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ========== Pop ==========
|
|
|
|
|
//
|
2025-11-14 01:02:00 +09:00
|
|
|
// Pop BASE pointer from TLS SLL.
|
|
|
|
|
// Returns true on success and stores BASE into *out.
|
2025-11-22 11:30:46 +09:00
|
|
|
//
|
|
|
|
|
// Implementation function with callsite tracking (where).
|
|
|
|
|
// Use tls_sll_pop() macro instead of calling directly.
|
2025-11-14 01:02:00 +09:00
|
|
|
|
2025-11-22 11:30:46 +09:00
|
|
|
static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where)
|
2025-11-14 01:02:00 +09:00
|
|
|
{
|
Add Box I (Integrity), Box E (Expansion), and comprehensive P0 debugging infrastructure
## Major Additions
### 1. Box I: Integrity Verification System (NEW - 703 lines)
- Files: core/box/integrity_box.h (267 lines), core/box/integrity_box.c (436 lines)
- Purpose: Unified integrity checking across all HAKMEM subsystems
- Features:
* 4-level integrity checking (0-4, compile-time controlled)
* Priority 1: TLS array bounds validation
* Priority 2: Freelist pointer validation
* Priority 3: TLS canary monitoring
* Priority ALPHA: Slab metadata invariant checking (5 invariants)
* Atomic statistics tracking (thread-safe)
* Beautiful BOX_BOUNDARY design pattern
### 2. Box E: SuperSlab Expansion System (COMPLETE)
- Files: core/box/superslab_expansion_box.h, core/box/superslab_expansion_box.c
- Purpose: Safe SuperSlab expansion with TLS state guarantee
- Features:
* Immediate slab 0 binding after expansion
* TLS state snapshot and restoration
* Design by Contract (pre/post-conditions, invariants)
* Thread-safe with mutex protection
### 3. Comprehensive Integrity Checking System
- File: core/hakmem_tiny_integrity.h (NEW)
- Unified validation functions for all allocator subsystems
- Uninitialized memory pattern detection (0xa2, 0xcc, 0xdd, 0xfe)
- Pointer range validation (null-page, kernel-space)
### 4. P0 Bug Investigation - Root Cause Identified
**Bug**: SEGV at iteration 28440 (deterministic with seed 42)
**Pattern**: 0xa2a2a2a2a2a2a2a2 (uninitialized/ASan poisoning)
**Location**: TLS SLL (Single-Linked List) cache layer
**Root Cause**: Race condition or use-after-free in TLS list management (class 0)
**Detection**: Box I successfully caught invalid pointer at exact crash point
### 5. Defensive Improvements
- Defensive memset in SuperSlab allocation (all metadata arrays)
- Enhanced pointer validation with pattern detection
- BOX_BOUNDARY markers throughout codebase (beautiful modular design)
- 5 metadata invariant checks in allocation/free/refill paths
## Integration Points
- Modified 13 files with Box I/E integration
- Added 10+ BOX_BOUNDARY markers
- 5 critical integrity check points in P0 refill path
## Test Results (100K iterations)
- Baseline: 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- P0 Bug: Still crashes at 28440 iterations (TLS SLL race condition)
- Root cause: Identified but not yet fixed (requires deeper investigation)
## Performance
- Box I overhead: Zero in release builds (HAKMEM_INTEGRITY_LEVEL=0)
- Debug builds: Full validation enabled (HAKMEM_INTEGRITY_LEVEL=4)
- Beautiful modular design maintains clean separation of concerns
## Known Issues
- P0 Bug at 28440 iterations: Race condition in TLS SLL cache (class 0)
- Cause: Use-after-free or race in remote free draining
- Next step: Valgrind investigation to pinpoint exact corruption location
## Code Quality
- Total new code: ~1400 lines (Box I + Box E + integrity system)
- Design: Beautiful Box Theory with clear boundaries
- Modularity: Complete separation of concerns
- Documentation: Comprehensive inline comments and BOX_BOUNDARY markers
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 02:45:00 +09:00
|
|
|
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_pop");
|
2025-11-14 01:05:30 +09:00
|
|
|
// Class mask gate: if disallowed, behave as empty
|
|
|
|
|
if (__builtin_expect(((g_tls_sll_class_mask & (1u << class_idx)) == 0), 0)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
Add Box I (Integrity), Box E (Expansion), and comprehensive P0 debugging infrastructure
## Major Additions
### 1. Box I: Integrity Verification System (NEW - 703 lines)
- Files: core/box/integrity_box.h (267 lines), core/box/integrity_box.c (436 lines)
- Purpose: Unified integrity checking across all HAKMEM subsystems
- Features:
* 4-level integrity checking (0-4, compile-time controlled)
* Priority 1: TLS array bounds validation
* Priority 2: Freelist pointer validation
* Priority 3: TLS canary monitoring
* Priority ALPHA: Slab metadata invariant checking (5 invariants)
* Atomic statistics tracking (thread-safe)
* Beautiful BOX_BOUNDARY design pattern
### 2. Box E: SuperSlab Expansion System (COMPLETE)
- Files: core/box/superslab_expansion_box.h, core/box/superslab_expansion_box.c
- Purpose: Safe SuperSlab expansion with TLS state guarantee
- Features:
* Immediate slab 0 binding after expansion
* TLS state snapshot and restoration
* Design by Contract (pre/post-conditions, invariants)
* Thread-safe with mutex protection
### 3. Comprehensive Integrity Checking System
- File: core/hakmem_tiny_integrity.h (NEW)
- Unified validation functions for all allocator subsystems
- Uninitialized memory pattern detection (0xa2, 0xcc, 0xdd, 0xfe)
- Pointer range validation (null-page, kernel-space)
### 4. P0 Bug Investigation - Root Cause Identified
**Bug**: SEGV at iteration 28440 (deterministic with seed 42)
**Pattern**: 0xa2a2a2a2a2a2a2a2 (uninitialized/ASan poisoning)
**Location**: TLS SLL (Single-Linked List) cache layer
**Root Cause**: Race condition or use-after-free in TLS list management (class 0)
**Detection**: Box I successfully caught invalid pointer at exact crash point
### 5. Defensive Improvements
- Defensive memset in SuperSlab allocation (all metadata arrays)
- Enhanced pointer validation with pattern detection
- BOX_BOUNDARY markers throughout codebase (beautiful modular design)
- 5 metadata invariant checks in allocation/free/refill paths
## Integration Points
- Modified 13 files with Box I/E integration
- Added 10+ BOX_BOUNDARY markers
- 5 critical integrity check points in P0 refill path
## Test Results (100K iterations)
- Baseline: 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- P0 Bug: Still crashes at 28440 iterations (TLS SLL race condition)
- Root cause: Identified but not yet fixed (requires deeper investigation)
## Performance
- Box I overhead: Zero in release builds (HAKMEM_INTEGRITY_LEVEL=0)
- Debug builds: Full validation enabled (HAKMEM_INTEGRITY_LEVEL=4)
- Beautiful modular design maintains clean separation of concerns
## Known Issues
- P0 Bug at 28440 iterations: Race condition in TLS SLL cache (class 0)
- Cause: Use-after-free or race in remote free draining
- Next step: Valgrind investigation to pinpoint exact corruption location
## Code Quality
- Total new code: ~1400 lines (Box I + Box E + integrity system)
- Design: Beautiful Box Theory with clear boundaries
- Modularity: Complete separation of concerns
- Documentation: Comprehensive inline comments and BOX_BOUNDARY markers
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 02:45:00 +09:00
|
|
|
atomic_fetch_add(&g_integrity_check_class_bounds, 1);
|
|
|
|
|
|
2025-11-20 07:32:30 +09:00
|
|
|
void* base = g_tls_sll[class_idx].head;
|
2025-11-10 17:02:25 +09:00
|
|
|
if (!base) {
|
2025-11-14 01:02:00 +09:00
|
|
|
return false;
|
2025-11-10 16:48:20 +09:00
|
|
|
}
|
|
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
// Sentinel guard: remote sentinel must never be in TLS SLL.
|
2025-11-13 05:43:31 +09:00
|
|
|
if (__builtin_expect((uintptr_t)base == TINY_REMOTE_SENTINEL, 0)) {
|
2025-11-20 07:32:30 +09:00
|
|
|
g_tls_sll[class_idx].head = NULL;
|
|
|
|
|
g_tls_sll[class_idx].count = 0;
|
2025-11-21 23:00:24 +09:00
|
|
|
tls_sll_record_writer(class_idx, "pop_sentinel_reset");
|
Add Box I (Integrity), Box E (Expansion), and comprehensive P0 debugging infrastructure
## Major Additions
### 1. Box I: Integrity Verification System (NEW - 703 lines)
- Files: core/box/integrity_box.h (267 lines), core/box/integrity_box.c (436 lines)
- Purpose: Unified integrity checking across all HAKMEM subsystems
- Features:
* 4-level integrity checking (0-4, compile-time controlled)
* Priority 1: TLS array bounds validation
* Priority 2: Freelist pointer validation
* Priority 3: TLS canary monitoring
* Priority ALPHA: Slab metadata invariant checking (5 invariants)
* Atomic statistics tracking (thread-safe)
* Beautiful BOX_BOUNDARY design pattern
### 2. Box E: SuperSlab Expansion System (COMPLETE)
- Files: core/box/superslab_expansion_box.h, core/box/superslab_expansion_box.c
- Purpose: Safe SuperSlab expansion with TLS state guarantee
- Features:
* Immediate slab 0 binding after expansion
* TLS state snapshot and restoration
* Design by Contract (pre/post-conditions, invariants)
* Thread-safe with mutex protection
### 3. Comprehensive Integrity Checking System
- File: core/hakmem_tiny_integrity.h (NEW)
- Unified validation functions for all allocator subsystems
- Uninitialized memory pattern detection (0xa2, 0xcc, 0xdd, 0xfe)
- Pointer range validation (null-page, kernel-space)
### 4. P0 Bug Investigation - Root Cause Identified
**Bug**: SEGV at iteration 28440 (deterministic with seed 42)
**Pattern**: 0xa2a2a2a2a2a2a2a2 (uninitialized/ASan poisoning)
**Location**: TLS SLL (Single-Linked List) cache layer
**Root Cause**: Race condition or use-after-free in TLS list management (class 0)
**Detection**: Box I successfully caught invalid pointer at exact crash point
### 5. Defensive Improvements
- Defensive memset in SuperSlab allocation (all metadata arrays)
- Enhanced pointer validation with pattern detection
- BOX_BOUNDARY markers throughout codebase (beautiful modular design)
- 5 metadata invariant checks in allocation/free/refill paths
## Integration Points
- Modified 13 files with Box I/E integration
- Added 10+ BOX_BOUNDARY markers
- 5 critical integrity check points in P0 refill path
## Test Results (100K iterations)
- Baseline: 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- P0 Bug: Still crashes at 28440 iterations (TLS SLL race condition)
- Root cause: Identified but not yet fixed (requires deeper investigation)
## Performance
- Box I overhead: Zero in release builds (HAKMEM_INTEGRITY_LEVEL=0)
- Debug builds: Full validation enabled (HAKMEM_INTEGRITY_LEVEL=4)
- Beautiful modular design maintains clean separation of concerns
## Known Issues
- P0 Bug at 28440 iterations: Race condition in TLS SLL cache (class 0)
- Cause: Use-after-free or race in remote free draining
- Next step: Valgrind investigation to pinpoint exact corruption location
## Code Quality
- Total new code: ~1400 lines (Box I + Box E + integrity system)
- Design: Beautiful Box Theory with clear boundaries
- Modularity: Complete separation of concerns
- Documentation: Comprehensive inline comments and BOX_BOUNDARY markers
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 02:45:00 +09:00
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
2025-11-14 01:02:00 +09:00
|
|
|
fprintf(stderr,
|
|
|
|
|
"[TLS_SLL_POP] Remote sentinel detected at head; SLL reset (cls=%d)\n",
|
|
|
|
|
class_idx);
|
2025-11-10 18:04:08 +09:00
|
|
|
#endif
|
Front-Direct implementation: SS→FC direct refill + SLL complete bypass
## Summary
Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)
## New Modules
- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
- Remote drain → Freelist → Carve priority
- Header restoration for C1-C6 (NOT C0/C7)
- ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN
- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition
## Allocation Path (core/tiny_alloc_fast.inc.h)
- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
- Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
- Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)
## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)
- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)
## Legacy Sealing
- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry
## ENV Controls
- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)
## Benchmarks (Front-Direct Enabled)
```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
HAKMEM_TINY_BUMP_CHUNK=256
bench_random_mixed (16-1040B random, 200K iter):
256 slots: 1.44M ops/s (STABLE, 0 SEGV)
128 slots: 1.44M ops/s (STABLE, 0 SEGV)
bench_fixed_size (fixed size, 200K iter):
256B: 4.06M ops/s (has debug logs, expected >10M without logs)
128B: Similar (debug logs affect)
```
## Verification
- TRACE_RING test (10K iter): **0 SLL events** detected ✅
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV
## Next Steps
- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)
Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink
2025-11-14 05:41:49 +09:00
|
|
|
{
|
|
|
|
|
static int g_sll_ring_en = -1;
|
|
|
|
|
if (__builtin_expect(g_sll_ring_en == -1, 0)) {
|
|
|
|
|
const char* r = getenv("HAKMEM_TINY_SLL_RING");
|
|
|
|
|
g_sll_ring_en = (r && *r && *r != '0') ? 1 : 0;
|
|
|
|
|
}
|
|
|
|
|
if (__builtin_expect(g_sll_ring_en, 0)) {
|
|
|
|
|
tiny_debug_ring_record(0x7F11 /*TLS_SLL_SENTINEL*/, (uint16_t)class_idx, base, 0);
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-11-14 01:02:00 +09:00
|
|
|
return false;
|
|
|
|
|
}
|
Add Box I (Integrity), Box E (Expansion), and comprehensive P0 debugging infrastructure
## Major Additions
### 1. Box I: Integrity Verification System (NEW - 703 lines)
- Files: core/box/integrity_box.h (267 lines), core/box/integrity_box.c (436 lines)
- Purpose: Unified integrity checking across all HAKMEM subsystems
- Features:
* 4-level integrity checking (0-4, compile-time controlled)
* Priority 1: TLS array bounds validation
* Priority 2: Freelist pointer validation
* Priority 3: TLS canary monitoring
* Priority ALPHA: Slab metadata invariant checking (5 invariants)
* Atomic statistics tracking (thread-safe)
* Beautiful BOX_BOUNDARY design pattern
### 2. Box E: SuperSlab Expansion System (COMPLETE)
- Files: core/box/superslab_expansion_box.h, core/box/superslab_expansion_box.c
- Purpose: Safe SuperSlab expansion with TLS state guarantee
- Features:
* Immediate slab 0 binding after expansion
* TLS state snapshot and restoration
* Design by Contract (pre/post-conditions, invariants)
* Thread-safe with mutex protection
### 3. Comprehensive Integrity Checking System
- File: core/hakmem_tiny_integrity.h (NEW)
- Unified validation functions for all allocator subsystems
- Uninitialized memory pattern detection (0xa2, 0xcc, 0xdd, 0xfe)
- Pointer range validation (null-page, kernel-space)
### 4. P0 Bug Investigation - Root Cause Identified
**Bug**: SEGV at iteration 28440 (deterministic with seed 42)
**Pattern**: 0xa2a2a2a2a2a2a2a2 (uninitialized/ASan poisoning)
**Location**: TLS SLL (Single-Linked List) cache layer
**Root Cause**: Race condition or use-after-free in TLS list management (class 0)
**Detection**: Box I successfully caught invalid pointer at exact crash point
### 5. Defensive Improvements
- Defensive memset in SuperSlab allocation (all metadata arrays)
- Enhanced pointer validation with pattern detection
- BOX_BOUNDARY markers throughout codebase (beautiful modular design)
- 5 metadata invariant checks in allocation/free/refill paths
## Integration Points
- Modified 13 files with Box I/E integration
- Added 10+ BOX_BOUNDARY markers
- 5 critical integrity check points in P0 refill path
## Test Results (100K iterations)
- Baseline: 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- P0 Bug: Still crashes at 28440 iterations (TLS SLL race condition)
- Root cause: Identified but not yet fixed (requires deeper investigation)
## Performance
- Box I overhead: Zero in release builds (HAKMEM_INTEGRITY_LEVEL=0)
- Debug builds: Full validation enabled (HAKMEM_INTEGRITY_LEVEL=4)
- Beautiful modular design maintains clean separation of concerns
## Known Issues
- P0 Bug at 28440 iterations: Race condition in TLS SLL cache (class 0)
- Cause: Use-after-free or race in remote free draining
- Next step: Valgrind investigation to pinpoint exact corruption location
## Code Quality
- Total new code: ~1400 lines (Box I + Box E + integrity system)
- Design: Beautiful Box Theory with clear boundaries
- Modularity: Complete separation of concerns
- Documentation: Comprehensive inline comments and BOX_BOUNDARY markers
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 02:45:00 +09:00
|
|
|
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
2025-11-14 01:02:00 +09:00
|
|
|
if (!validate_ptr_range(base, "tls_sll_pop_base")) {
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"[TLS_SLL_POP] FATAL invalid BASE ptr cls=%d base=%p\n",
|
|
|
|
|
class_idx, base);
|
Add Box I (Integrity), Box E (Expansion), and comprehensive P0 debugging infrastructure
## Major Additions
### 1. Box I: Integrity Verification System (NEW - 703 lines)
- Files: core/box/integrity_box.h (267 lines), core/box/integrity_box.c (436 lines)
- Purpose: Unified integrity checking across all HAKMEM subsystems
- Features:
* 4-level integrity checking (0-4, compile-time controlled)
* Priority 1: TLS array bounds validation
* Priority 2: Freelist pointer validation
* Priority 3: TLS canary monitoring
* Priority ALPHA: Slab metadata invariant checking (5 invariants)
* Atomic statistics tracking (thread-safe)
* Beautiful BOX_BOUNDARY design pattern
### 2. Box E: SuperSlab Expansion System (COMPLETE)
- Files: core/box/superslab_expansion_box.h, core/box/superslab_expansion_box.c
- Purpose: Safe SuperSlab expansion with TLS state guarantee
- Features:
* Immediate slab 0 binding after expansion
* TLS state snapshot and restoration
* Design by Contract (pre/post-conditions, invariants)
* Thread-safe with mutex protection
### 3. Comprehensive Integrity Checking System
- File: core/hakmem_tiny_integrity.h (NEW)
- Unified validation functions for all allocator subsystems
- Uninitialized memory pattern detection (0xa2, 0xcc, 0xdd, 0xfe)
- Pointer range validation (null-page, kernel-space)
### 4. P0 Bug Investigation - Root Cause Identified
**Bug**: SEGV at iteration 28440 (deterministic with seed 42)
**Pattern**: 0xa2a2a2a2a2a2a2a2 (uninitialized/ASan poisoning)
**Location**: TLS SLL (Single-Linked List) cache layer
**Root Cause**: Race condition or use-after-free in TLS list management (class 0)
**Detection**: Box I successfully caught invalid pointer at exact crash point
### 5. Defensive Improvements
- Defensive memset in SuperSlab allocation (all metadata arrays)
- Enhanced pointer validation with pattern detection
- BOX_BOUNDARY markers throughout codebase (beautiful modular design)
- 5 metadata invariant checks in allocation/free/refill paths
## Integration Points
- Modified 13 files with Box I/E integration
- Added 10+ BOX_BOUNDARY markers
- 5 critical integrity check points in P0 refill path
## Test Results (100K iterations)
- Baseline: 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- P0 Bug: Still crashes at 28440 iterations (TLS SLL race condition)
- Root cause: Identified but not yet fixed (requires deeper investigation)
## Performance
- Box I overhead: Zero in release builds (HAKMEM_INTEGRITY_LEVEL=0)
- Debug builds: Full validation enabled (HAKMEM_INTEGRITY_LEVEL=4)
- Beautiful modular design maintains clean separation of concerns
## Known Issues
- P0 Bug at 28440 iterations: Race condition in TLS SLL cache (class 0)
- Cause: Use-after-free or race in remote free draining
- Next step: Valgrind investigation to pinpoint exact corruption location
## Code Quality
- Total new code: ~1400 lines (Box I + Box E + integrity system)
- Design: Beautiful Box Theory with clear boundaries
- Modularity: Complete separation of concerns
- Documentation: Comprehensive inline comments and BOX_BOUNDARY markers
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 02:45:00 +09:00
|
|
|
abort();
|
|
|
|
|
}
|
2025-11-21 23:00:24 +09:00
|
|
|
#else
|
|
|
|
|
// Fail-fast even in release: drop malformed TLS head to avoid SEGV on bad base.
|
|
|
|
|
uintptr_t base_addr = (uintptr_t)base;
|
|
|
|
|
if (base_addr < 4096 || base_addr > 0x00007fffffffffffULL) {
|
|
|
|
|
extern _Atomic uint64_t g_tls_sll_invalid_head[];
|
|
|
|
|
uint64_t cnt = atomic_fetch_add_explicit(&g_tls_sll_invalid_head[class_idx], 1, memory_order_relaxed);
|
|
|
|
|
static __thread uint8_t s_log_limit[TINY_NUM_CLASSES] = {0};
|
|
|
|
|
if (s_log_limit[class_idx] < 4) {
|
|
|
|
|
fprintf(stderr, "[TLS_SLL_POP_INVALID] cls=%d head=%p dropped count=%llu\n",
|
|
|
|
|
class_idx, base, (unsigned long long)cnt + 1);
|
|
|
|
|
s_log_limit[class_idx]++;
|
|
|
|
|
}
|
|
|
|
|
// Help triage: show last successful push base for this thread/class
|
|
|
|
|
if (s_tls_sll_last_push[class_idx] && s_log_limit[class_idx] <= 4) {
|
|
|
|
|
fprintf(stderr, "[TLS_SLL_POP_INVALID] cls=%d last_push=%p\n",
|
|
|
|
|
class_idx, s_tls_sll_last_push[class_idx]);
|
|
|
|
|
}
|
|
|
|
|
tls_sll_dump_tls_window(class_idx, "head_range");
|
|
|
|
|
g_tls_sll[class_idx].head = NULL;
|
|
|
|
|
g_tls_sll[class_idx].count = 0;
|
|
|
|
|
tls_sll_record_writer(class_idx, "pop_invalid_head");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
Add Box I (Integrity), Box E (Expansion), and comprehensive P0 debugging infrastructure
## Major Additions
### 1. Box I: Integrity Verification System (NEW - 703 lines)
- Files: core/box/integrity_box.h (267 lines), core/box/integrity_box.c (436 lines)
- Purpose: Unified integrity checking across all HAKMEM subsystems
- Features:
* 4-level integrity checking (0-4, compile-time controlled)
* Priority 1: TLS array bounds validation
* Priority 2: Freelist pointer validation
* Priority 3: TLS canary monitoring
* Priority ALPHA: Slab metadata invariant checking (5 invariants)
* Atomic statistics tracking (thread-safe)
* Beautiful BOX_BOUNDARY design pattern
### 2. Box E: SuperSlab Expansion System (COMPLETE)
- Files: core/box/superslab_expansion_box.h, core/box/superslab_expansion_box.c
- Purpose: Safe SuperSlab expansion with TLS state guarantee
- Features:
* Immediate slab 0 binding after expansion
* TLS state snapshot and restoration
* Design by Contract (pre/post-conditions, invariants)
* Thread-safe with mutex protection
### 3. Comprehensive Integrity Checking System
- File: core/hakmem_tiny_integrity.h (NEW)
- Unified validation functions for all allocator subsystems
- Uninitialized memory pattern detection (0xa2, 0xcc, 0xdd, 0xfe)
- Pointer range validation (null-page, kernel-space)
### 4. P0 Bug Investigation - Root Cause Identified
**Bug**: SEGV at iteration 28440 (deterministic with seed 42)
**Pattern**: 0xa2a2a2a2a2a2a2a2 (uninitialized/ASan poisoning)
**Location**: TLS SLL (Single-Linked List) cache layer
**Root Cause**: Race condition or use-after-free in TLS list management (class 0)
**Detection**: Box I successfully caught invalid pointer at exact crash point
### 5. Defensive Improvements
- Defensive memset in SuperSlab allocation (all metadata arrays)
- Enhanced pointer validation with pattern detection
- BOX_BOUNDARY markers throughout codebase (beautiful modular design)
- 5 metadata invariant checks in allocation/free/refill paths
## Integration Points
- Modified 13 files with Box I/E integration
- Added 10+ BOX_BOUNDARY markers
- 5 critical integrity check points in P0 refill path
## Test Results (100K iterations)
- Baseline: 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- P0 Bug: Still crashes at 28440 iterations (TLS SLL race condition)
- Root cause: Identified but not yet fixed (requires deeper investigation)
## Performance
- Box I overhead: Zero in release builds (HAKMEM_INTEGRITY_LEVEL=0)
- Debug builds: Full validation enabled (HAKMEM_INTEGRITY_LEVEL=4)
- Beautiful modular design maintains clean separation of concerns
## Known Issues
- P0 Bug at 28440 iterations: Race condition in TLS SLL cache (class 0)
- Cause: Use-after-free or race in remote free draining
- Next step: Valgrind investigation to pinpoint exact corruption location
## Code Quality
- Total new code: ~1400 lines (Box I + Box E + integrity system)
- Design: Beautiful Box Theory with clear boundaries
- Modularity: Complete separation of concerns
- Documentation: Comprehensive inline comments and BOX_BOUNDARY markers
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 02:45:00 +09:00
|
|
|
#endif
|
|
|
|
|
|
2025-11-21 23:00:24 +09:00
|
|
|
// Optional high-frequency canary check for target classes (e.g., 4/6)
|
|
|
|
|
static int s_canary_fast = -1;
|
|
|
|
|
if (__builtin_expect(s_canary_fast == -1, 0)) {
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_SLL_CANARY_FAST");
|
|
|
|
|
s_canary_fast = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
}
|
|
|
|
|
if (__builtin_expect(s_canary_fast && (class_idx == 4 || class_idx == 6), 0)) {
|
|
|
|
|
extern _Atomic uint64_t g_tls_sll_pop_counter[];
|
|
|
|
|
uint64_t pc = atomic_fetch_add_explicit(&g_tls_sll_pop_counter[class_idx], 1, memory_order_relaxed) + 1;
|
|
|
|
|
periodic_canary_check(pc, class_idx == 4 ? "tls_sll_pop_cls4" : "tls_sll_pop_cls6");
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-10 23:41:53 +09:00
|
|
|
tls_sll_debug_guard(class_idx, base, "pop");
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
|
|
|
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
2025-11-14 01:02:00 +09:00
|
|
|
// Header validation for header-classes (class != 0,7).
|
2025-11-13 01:45:30 +09:00
|
|
|
if (class_idx != 0 && class_idx != 7) {
|
2025-11-14 01:02:00 +09:00
|
|
|
uint8_t got = *(uint8_t*)base;
|
|
|
|
|
uint8_t expect = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
|
|
|
|
|
PTR_TRACK_TLS_POP(base, class_idx);
|
|
|
|
|
PTR_TRACK_HEADER_READ(base, got);
|
|
|
|
|
if (__builtin_expect(got != expect, 0)) {
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"[TLS_SLL_POP] CORRUPTED HEADER cls=%d base=%p got=0x%02x expect=0x%02x\n",
|
|
|
|
|
class_idx, base, got, expect);
|
|
|
|
|
ptr_trace_dump_now("header_corruption");
|
|
|
|
|
abort();
|
|
|
|
|
#else
|
|
|
|
|
// In release, fail-safe: drop list.
|
2025-11-21 23:00:24 +09:00
|
|
|
// PERF DEBUG: Count header corruption resets
|
|
|
|
|
static _Atomic uint64_t g_hdr_reset_count = 0;
|
|
|
|
|
uint64_t cnt = atomic_fetch_add_explicit(&g_hdr_reset_count, 1, memory_order_relaxed);
|
|
|
|
|
if (cnt % 10000 == 0) {
|
|
|
|
|
fprintf(stderr, "[TLS_SLL_HDR_RESET] cls=%d base=%p got=0x%02x expect=0x%02x count=%llu\n",
|
|
|
|
|
class_idx, base, got, expect, (unsigned long long)cnt);
|
|
|
|
|
}
|
2025-11-20 07:32:30 +09:00
|
|
|
g_tls_sll[class_idx].head = NULL;
|
|
|
|
|
g_tls_sll[class_idx].count = 0;
|
2025-11-21 23:00:24 +09:00
|
|
|
tls_sll_record_writer(class_idx, "header_reset");
|
Front-Direct implementation: SS→FC direct refill + SLL complete bypass
## Summary
Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)
## New Modules
- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
- Remote drain → Freelist → Carve priority
- Header restoration for C1-C6 (NOT C0/C7)
- ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN
- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition
## Allocation Path (core/tiny_alloc_fast.inc.h)
- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
- Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
- Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)
## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)
- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)
## Legacy Sealing
- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry
## ENV Controls
- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)
## Benchmarks (Front-Direct Enabled)
```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
HAKMEM_TINY_BUMP_CHUNK=256
bench_random_mixed (16-1040B random, 200K iter):
256 slots: 1.44M ops/s (STABLE, 0 SEGV)
128 slots: 1.44M ops/s (STABLE, 0 SEGV)
bench_fixed_size (fixed size, 200K iter):
256B: 4.06M ops/s (has debug logs, expected >10M without logs)
128B: Similar (debug logs affect)
```
## Verification
- TRACE_RING test (10K iter): **0 SLL events** detected ✅
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV
## Next Steps
- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)
Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink
2025-11-14 05:41:49 +09:00
|
|
|
{
|
|
|
|
|
static int g_sll_ring_en = -1;
|
|
|
|
|
if (__builtin_expect(g_sll_ring_en == -1, 0)) {
|
|
|
|
|
const char* r = getenv("HAKMEM_TINY_SLL_RING");
|
|
|
|
|
g_sll_ring_en = (r && *r && *r != '0') ? 1 : 0;
|
|
|
|
|
}
|
|
|
|
|
if (__builtin_expect(g_sll_ring_en, 0)) {
|
|
|
|
|
// aux encodes: high 8 bits = got, low 8 bits = expect
|
|
|
|
|
uintptr_t aux = ((uintptr_t)got << 8) | (uintptr_t)expect;
|
|
|
|
|
tiny_debug_ring_record(0x7F12 /*TLS_SLL_HDR_CORRUPT*/, (uint16_t)class_idx, base, aux);
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-11-14 01:02:00 +09:00
|
|
|
return false;
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
#endif
|
2025-11-14 01:02:00 +09:00
|
|
|
}
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
}
|
2025-11-14 01:02:00 +09:00
|
|
|
#endif
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
// Read next via Box API.
|
|
|
|
|
void* next;
|
|
|
|
|
PTR_NEXT_READ("tls_pop", class_idx, base, 0, next);
|
2025-11-21 23:00:24 +09:00
|
|
|
tls_sll_diag_next(class_idx, base, next, "pop_next");
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
|
Add Box I (Integrity), Box E (Expansion), and comprehensive P0 debugging infrastructure
## Major Additions
### 1. Box I: Integrity Verification System (NEW - 703 lines)
- Files: core/box/integrity_box.h (267 lines), core/box/integrity_box.c (436 lines)
- Purpose: Unified integrity checking across all HAKMEM subsystems
- Features:
* 4-level integrity checking (0-4, compile-time controlled)
* Priority 1: TLS array bounds validation
* Priority 2: Freelist pointer validation
* Priority 3: TLS canary monitoring
* Priority ALPHA: Slab metadata invariant checking (5 invariants)
* Atomic statistics tracking (thread-safe)
* Beautiful BOX_BOUNDARY design pattern
### 2. Box E: SuperSlab Expansion System (COMPLETE)
- Files: core/box/superslab_expansion_box.h, core/box/superslab_expansion_box.c
- Purpose: Safe SuperSlab expansion with TLS state guarantee
- Features:
* Immediate slab 0 binding after expansion
* TLS state snapshot and restoration
* Design by Contract (pre/post-conditions, invariants)
* Thread-safe with mutex protection
### 3. Comprehensive Integrity Checking System
- File: core/hakmem_tiny_integrity.h (NEW)
- Unified validation functions for all allocator subsystems
- Uninitialized memory pattern detection (0xa2, 0xcc, 0xdd, 0xfe)
- Pointer range validation (null-page, kernel-space)
### 4. P0 Bug Investigation - Root Cause Identified
**Bug**: SEGV at iteration 28440 (deterministic with seed 42)
**Pattern**: 0xa2a2a2a2a2a2a2a2 (uninitialized/ASan poisoning)
**Location**: TLS SLL (Single-Linked List) cache layer
**Root Cause**: Race condition or use-after-free in TLS list management (class 0)
**Detection**: Box I successfully caught invalid pointer at exact crash point
### 5. Defensive Improvements
- Defensive memset in SuperSlab allocation (all metadata arrays)
- Enhanced pointer validation with pattern detection
- BOX_BOUNDARY markers throughout codebase (beautiful modular design)
- 5 metadata invariant checks in allocation/free/refill paths
## Integration Points
- Modified 13 files with Box I/E integration
- Added 10+ BOX_BOUNDARY markers
- 5 critical integrity check points in P0 refill path
## Test Results (100K iterations)
- Baseline: 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- P0 Bug: Still crashes at 28440 iterations (TLS SLL race condition)
- Root cause: Identified but not yet fixed (requires deeper investigation)
## Performance
- Box I overhead: Zero in release builds (HAKMEM_INTEGRITY_LEVEL=0)
- Debug builds: Full validation enabled (HAKMEM_INTEGRITY_LEVEL=4)
- Beautiful modular design maintains clean separation of concerns
## Known Issues
- P0 Bug at 28440 iterations: Race condition in TLS SLL cache (class 0)
- Cause: Use-after-free or race in remote free draining
- Next step: Valgrind investigation to pinpoint exact corruption location
## Code Quality
- Total new code: ~1400 lines (Box I + Box E + integrity system)
- Design: Beautiful Box Theory with clear boundaries
- Modularity: Complete separation of concerns
- Documentation: Comprehensive inline comments and BOX_BOUNDARY markers
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 02:45:00 +09:00
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
2025-11-14 01:02:00 +09:00
|
|
|
if (next && !validate_ptr_range(next, "tls_sll_pop_next")) {
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"[TLS_SLL_POP] FATAL invalid next ptr cls=%d base=%p next=%p\n",
|
|
|
|
|
class_idx, base, next);
|
|
|
|
|
ptr_trace_dump_now("next_corruption");
|
Add Box I (Integrity), Box E (Expansion), and comprehensive P0 debugging infrastructure
## Major Additions
### 1. Box I: Integrity Verification System (NEW - 703 lines)
- Files: core/box/integrity_box.h (267 lines), core/box/integrity_box.c (436 lines)
- Purpose: Unified integrity checking across all HAKMEM subsystems
- Features:
* 4-level integrity checking (0-4, compile-time controlled)
* Priority 1: TLS array bounds validation
* Priority 2: Freelist pointer validation
* Priority 3: TLS canary monitoring
* Priority ALPHA: Slab metadata invariant checking (5 invariants)
* Atomic statistics tracking (thread-safe)
* Beautiful BOX_BOUNDARY design pattern
### 2. Box E: SuperSlab Expansion System (COMPLETE)
- Files: core/box/superslab_expansion_box.h, core/box/superslab_expansion_box.c
- Purpose: Safe SuperSlab expansion with TLS state guarantee
- Features:
* Immediate slab 0 binding after expansion
* TLS state snapshot and restoration
* Design by Contract (pre/post-conditions, invariants)
* Thread-safe with mutex protection
### 3. Comprehensive Integrity Checking System
- File: core/hakmem_tiny_integrity.h (NEW)
- Unified validation functions for all allocator subsystems
- Uninitialized memory pattern detection (0xa2, 0xcc, 0xdd, 0xfe)
- Pointer range validation (null-page, kernel-space)
### 4. P0 Bug Investigation - Root Cause Identified
**Bug**: SEGV at iteration 28440 (deterministic with seed 42)
**Pattern**: 0xa2a2a2a2a2a2a2a2 (uninitialized/ASan poisoning)
**Location**: TLS SLL (Single-Linked List) cache layer
**Root Cause**: Race condition or use-after-free in TLS list management (class 0)
**Detection**: Box I successfully caught invalid pointer at exact crash point
### 5. Defensive Improvements
- Defensive memset in SuperSlab allocation (all metadata arrays)
- Enhanced pointer validation with pattern detection
- BOX_BOUNDARY markers throughout codebase (beautiful modular design)
- 5 metadata invariant checks in allocation/free/refill paths
## Integration Points
- Modified 13 files with Box I/E integration
- Added 10+ BOX_BOUNDARY markers
- 5 critical integrity check points in P0 refill path
## Test Results (100K iterations)
- Baseline: 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- P0 Bug: Still crashes at 28440 iterations (TLS SLL race condition)
- Root cause: Identified but not yet fixed (requires deeper investigation)
## Performance
- Box I overhead: Zero in release builds (HAKMEM_INTEGRITY_LEVEL=0)
- Debug builds: Full validation enabled (HAKMEM_INTEGRITY_LEVEL=4)
- Beautiful modular design maintains clean separation of concerns
## Known Issues
- P0 Bug at 28440 iterations: Race condition in TLS SLL cache (class 0)
- Cause: Use-after-free or race in remote free draining
- Next step: Valgrind investigation to pinpoint exact corruption location
## Code Quality
- Total new code: ~1400 lines (Box I + Box E + integrity system)
- Design: Beautiful Box Theory with clear boundaries
- Modularity: Complete separation of concerns
- Documentation: Comprehensive inline comments and BOX_BOUNDARY markers
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 02:45:00 +09:00
|
|
|
abort();
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-11-20 07:32:30 +09:00
|
|
|
g_tls_sll[class_idx].head = next;
|
2025-11-21 23:00:24 +09:00
|
|
|
tls_sll_record_writer(class_idx, "pop");
|
|
|
|
|
if ((class_idx == 4 || class_idx == 6) && next && !tls_sll_head_valid(next)) {
|
|
|
|
|
fprintf(stderr, "[TLS_SLL_POP_POST_INVALID] cls=%d next=%p last_writer=%s\n",
|
|
|
|
|
class_idx,
|
|
|
|
|
next,
|
|
|
|
|
g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)");
|
|
|
|
|
tls_sll_dump_tls_window(class_idx, "pop_post");
|
|
|
|
|
g_tls_sll[class_idx].head = NULL;
|
|
|
|
|
g_tls_sll[class_idx].count = 0;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2025-11-20 07:32:30 +09:00
|
|
|
if (g_tls_sll[class_idx].count > 0) {
|
|
|
|
|
g_tls_sll[class_idx].count--;
|
2025-11-10 16:48:20 +09:00
|
|
|
}
|
|
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
// Clear next inside popped node to avoid stale-chain issues.
|
|
|
|
|
tiny_next_write(class_idx, base, NULL);
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
|
2025-11-22 11:30:46 +09:00
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
// Record callsite for debugging (debug-only)
|
|
|
|
|
s_tls_sll_last_pop_from[class_idx] = where;
|
|
|
|
|
#else
|
|
|
|
|
(void)where; // Suppress unused warning in release
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
*out = base;
|
2025-11-10 16:48:20 +09:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ========== Splice ==========
|
2025-11-10 17:02:25 +09:00
|
|
|
//
|
2025-11-14 01:02:00 +09:00
|
|
|
// Splice a pre-linked chain of BASE pointers into TLS SLL head.
|
|
|
|
|
// chain_head is BASE; links are via Box API-compatible next layout.
|
|
|
|
|
// Returns number of nodes actually moved (<= capacity remaining).
|
|
|
|
|
|
|
|
|
|
static inline uint32_t tls_sll_splice(int class_idx,
|
|
|
|
|
void* chain_head,
|
|
|
|
|
uint32_t count,
|
|
|
|
|
uint32_t capacity)
|
|
|
|
|
{
|
|
|
|
|
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_splice");
|
|
|
|
|
|
|
|
|
|
if (!chain_head || count == 0 || capacity == 0) {
|
|
|
|
|
return 0;
|
2025-11-10 16:48:20 +09:00
|
|
|
}
|
2025-11-13 01:45:30 +09:00
|
|
|
|
2025-11-20 07:32:30 +09:00
|
|
|
uint32_t cur = g_tls_sll[class_idx].count;
|
2025-11-14 01:02:00 +09:00
|
|
|
if (cur >= capacity) {
|
|
|
|
|
return 0;
|
2025-11-13 01:45:30 +09:00
|
|
|
}
|
|
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
uint32_t room = capacity - cur;
|
|
|
|
|
uint32_t to_move = (count < room) ? count : room;
|
|
|
|
|
|
|
|
|
|
// Traverse chain up to to_move, validate, and find tail.
|
|
|
|
|
void* tail = chain_head;
|
|
|
|
|
uint32_t moved = 1;
|
2025-11-10 16:48:20 +09:00
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
tls_sll_debug_guard(class_idx, chain_head, "splice_head");
|
2025-11-10 16:48:20 +09:00
|
|
|
|
2025-11-10 18:04:08 +09:00
|
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
2025-11-14 01:02:00 +09:00
|
|
|
// Restore header defensively on each node we touch.
|
2025-11-11 10:00:36 +09:00
|
|
|
{
|
2025-11-14 01:02:00 +09:00
|
|
|
uint8_t* b = (uint8_t*)chain_head;
|
|
|
|
|
uint8_t expected = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
|
|
|
|
|
*b = expected;
|
2025-11-11 10:00:36 +09:00
|
|
|
}
|
2025-11-10 18:04:08 +09:00
|
|
|
#endif
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
while (moved < to_move) {
|
|
|
|
|
tls_sll_debug_guard(class_idx, tail, "splice_traverse");
|
|
|
|
|
|
|
|
|
|
void* next;
|
|
|
|
|
PTR_NEXT_READ("tls_splice_trav", class_idx, tail, 0, next);
|
2025-11-21 23:00:24 +09:00
|
|
|
if (next && !tls_sll_head_valid(next)) {
|
|
|
|
|
static _Atomic uint32_t g_splice_diag = 0;
|
|
|
|
|
uint32_t shot = atomic_fetch_add_explicit(&g_splice_diag, 1, memory_order_relaxed);
|
|
|
|
|
if (shot < 8) {
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"[TLS_SLL_SPLICE_INVALID_NEXT] cls=%d head=%p tail=%p next=%p moved=%u/%u\n",
|
|
|
|
|
class_idx, chain_head, tail, next, moved, to_move);
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-11-14 01:02:00 +09:00
|
|
|
|
2025-11-10 16:48:20 +09:00
|
|
|
if (!next) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
|
|
|
|
{
|
|
|
|
|
uint8_t* b = (uint8_t*)next;
|
|
|
|
|
uint8_t expected = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
|
|
|
|
|
*b = expected;
|
|
|
|
|
}
|
2025-11-10 23:41:53 +09:00
|
|
|
#endif
|
Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.
📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.
🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
* core/tiny_alloc_fast.inc.h (3 fixes)
* core/hakmem_tiny_refill.inc.h (2 fixes)
* core/hakmem_tiny_fastcache.inc.h (1 fix)
- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination
🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO
📋 FIXES SUMMARY:
Fix #1-8: Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10: USER pointer auto-fix (later disabled)
Fix #12: Validation system (caught corruption at call 14209)
Fix #13: Bump window header writes
Fix #14: Splice defense-in-depth
Fix #15: USER pointer detection (debugging tool)
Fix #16: Double conversion fix (FINAL SOLUTION) ✅
🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)
📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
tail = next;
|
|
|
|
|
moved++;
|
|
|
|
|
}
|
2025-11-10 16:48:20 +09:00
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
// Link tail to existing head and install new head.
|
|
|
|
|
tls_sll_debug_guard(class_idx, tail, "splice_tail");
|
2025-11-20 07:32:30 +09:00
|
|
|
PTR_NEXT_WRITE("tls_splice_link", class_idx, tail, 0, g_tls_sll[class_idx].head);
|
2025-11-10 16:48:20 +09:00
|
|
|
|
2025-11-20 07:32:30 +09:00
|
|
|
g_tls_sll[class_idx].head = chain_head;
|
2025-11-21 23:00:24 +09:00
|
|
|
tls_sll_record_writer(class_idx, "splice");
|
2025-11-20 07:32:30 +09:00
|
|
|
g_tls_sll[class_idx].count = cur + moved;
|
2025-11-10 16:48:20 +09:00
|
|
|
|
2025-11-14 01:02:00 +09:00
|
|
|
return moved;
|
2025-11-10 16:48:20 +09:00
|
|
|
}
|
|
|
|
|
|
2025-11-22 11:30:46 +09:00
|
|
|
// ========== Macro Wrappers ==========
|
|
|
|
|
//
|
|
|
|
|
// Box Theory: Callers use tls_sll_push/pop() macros which auto-insert __func__.
|
|
|
|
|
// No changes required to 20+ call sites.
|
|
|
|
|
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
# define tls_sll_push(cls, ptr, cap) \
|
|
|
|
|
tls_sll_push_impl((cls), (ptr), (cap), __func__)
|
|
|
|
|
# define tls_sll_pop(cls, out) \
|
|
|
|
|
tls_sll_pop_impl((cls), (out), __func__)
|
|
|
|
|
#else
|
|
|
|
|
# define tls_sll_push(cls, ptr, cap) \
|
|
|
|
|
tls_sll_push_impl((cls), (ptr), (cap), NULL)
|
|
|
|
|
# define tls_sll_pop(cls, out) \
|
|
|
|
|
tls_sll_pop_impl((cls), (out), NULL)
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-11-10 16:48:20 +09:00
|
|
|
#endif // TLS_SLL_BOX_H
|