Files
hakmem/core/hakmem_tiny_integrity.h
Moe Charm (CI) 84dbd97fe9 Fix #16: Resolve double BASE→USER conversion causing header corruption
🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.

📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.

🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
  * core/tiny_alloc_fast.inc.h (3 fixes)
  * core/hakmem_tiny_refill.inc.h (2 fixes)
  * core/hakmem_tiny_fastcache.inc.h (1 fix)

- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination

🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO

📋 FIXES SUMMARY:
Fix #1-8:   Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10:  USER pointer auto-fix (later disabled)
Fix #12:    Validation system (caught corruption at call 14209)
Fix #13:    Bump window header writes
Fix #14:    Splice defense-in-depth
Fix #15:    USER pointer detection (debugging tool)
Fix #16:    Double conversion fix (FINAL SOLUTION) 

🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)

📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-11-12 10:33:57 +09:00

212 lines
9.0 KiB
C

#ifndef HAKMEM_TINY_INTEGRITY_H
#define HAKMEM_TINY_INTEGRITY_H
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <assert.h>
#include "hakmem_tiny.h"
// ============================================================================
// PRIORITY 1: TLS Array Bounds Checks
// ============================================================================
// Macro for bounds checking class_idx before TLS array access
#define HAK_CHECK_CLASS_IDX(class_idx, label) do { \
if (__builtin_expect((class_idx) < 0 || (class_idx) >= TINY_NUM_CLASSES, 0)) { \
fprintf(stderr, "[%s] FATAL: class_idx=%d out of bounds [0,%d) at %s:%d\n", \
(label), (class_idx), TINY_NUM_CLASSES, __FILE__, __LINE__); \
fflush(stderr); \
assert(0 && "TLS array index out of bounds"); \
abort(); \
} \
} while(0)
// ============================================================================
// PRIORITY 2: Freelist Integrity Checks
// ============================================================================
// Validate freelist next pointer is within slab bounds
static inline int validate_freelist_next(void* ptr, void* next,
void* slab_base, size_t stride,
uint8_t class_idx,
size_t num_blocks,
const char* location) {
if (next == NULL) return 1; // NULL is valid (end of list)
void* slab_end = (uint8_t*)slab_base + (num_blocks * stride);
if (next < slab_base || next >= slab_end) {
fprintf(stderr, "[FREELIST_CORRUPT] %s: ptr=%p next=%p slab=[%p,%p) class=%d stride=%zu\n",
location, ptr, next, slab_base, slab_end, class_idx, stride);
fprintf(stderr, "[FREELIST_CORRUPT] next is OUT OF BOUNDS by %td bytes\n",
(uint8_t*)next < (uint8_t*)slab_base ?
((uint8_t*)slab_base - (uint8_t*)next) :
((uint8_t*)next - (uint8_t*)slab_end));
fflush(stderr);
assert(0 && "Freelist next pointer out of slab bounds");
return 0;
}
// Additional check: next pointer should be stride-aligned within slab
ptrdiff_t offset = (uint8_t*)next - (uint8_t*)slab_base;
if (offset % stride != 0) {
fprintf(stderr, "[FREELIST_MISALIGN] %s: ptr=%p next=%p offset=%td stride=%zu class=%d\n",
location, ptr, next, offset, stride, class_idx);
fprintf(stderr, "[FREELIST_MISALIGN] offset %% stride = %td (should be 0)\n",
offset % stride);
fflush(stderr);
assert(0 && "Freelist next pointer misaligned");
return 0;
}
return 1;
}
// Validate pointer is within valid address range (basic sanity)
static inline int validate_ptr_range(void* ptr, const char* location) {
if (ptr == NULL) return 1; // NULL is valid in some contexts
// Check for obviously invalid pointers
uintptr_t addr = (uintptr_t)ptr;
// DIAGNOSTIC: One-time log to confirm this function is actually running
static volatile int g_validate_logged = 0;
if (__builtin_expect(g_validate_logged == 0, 0)) {
g_validate_logged = 1;
fprintf(stderr, "[VALIDATE_PTR_RANGE] First call: %s ptr=%p\n", location, ptr);
fflush(stderr);
}
// Check for very low addresses (NULL-ish, likely corruption)
if (addr < 0x1000) {
fprintf(stderr, "[PTR_INVALID] %s: ptr=%p is suspiciously low (< 4KB)\n",
location, ptr);
fflush(stderr);
abort(); // Force abort (ignore assert settings)
}
// Check for very high addresses (kernel space on x86-64)
if (addr > 0x7fffffffffffULL) {
fprintf(stderr, "[PTR_INVALID] %s: ptr=%p is in kernel space range\n",
location, ptr);
fflush(stderr);
abort(); // Force abort
}
// Check for uninitialized/debug fill patterns (0xa2, 0xcc, 0xdd, 0xfe)
uint8_t* bytes = (uint8_t*)&addr;
if (bytes[0] == bytes[1] && bytes[1] == bytes[2] && bytes[2] == bytes[3] &&
bytes[3] == bytes[4] && bytes[4] == bytes[5] && bytes[5] == bytes[6] &&
bytes[6] == bytes[7]) {
// All bytes are the same - check for common debug patterns
if (bytes[0] == 0xa2 || bytes[0] == 0xcc || bytes[0] == 0xdd || bytes[0] == 0xfe) {
fprintf(stderr, "[PTR_INVALID] %s: ptr=%p is uninitialized (pattern 0x%02x)\n",
location, ptr, bytes[0]);
fprintf(stderr, "[PTR_INVALID] This indicates use-before-initialization!\n");
fprintf(stderr, "[PTR_INVALID] Common patterns: 0xa2=ASan, 0xcc=MSVC, 0xdd=freed, 0xfe=heap\n");
fflush(stderr);
abort(); // Force abort
}
}
return 1;
}
// ============================================================================
// PRIORITY 3: TLS Canaries
// ============================================================================
#define TLS_CANARY_MAGIC 0xDEADBEEFDEADBEEFULL
// External declarations (defined in hakmem_tiny.c)
extern __thread uint64_t g_tls_canary_before_sll_head;
extern __thread uint64_t g_tls_canary_after_sll_head;
extern __thread uint64_t g_tls_canary_before_sll_count;
extern __thread uint64_t g_tls_canary_after_sll_count;
// Validate TLS canaries (call periodically)
static inline void validate_tls_canaries(const char* location) {
if (g_tls_canary_before_sll_head != TLS_CANARY_MAGIC) {
fprintf(stderr, "[TLS_CANARY] %s: g_tls_sll_head BEFORE canary corrupted: 0x%016lx (expected 0x%016lx)\n",
location, g_tls_canary_before_sll_head, TLS_CANARY_MAGIC);
fflush(stderr);
assert(0 && "TLS canary before sll_head corrupted");
}
if (g_tls_canary_after_sll_head != TLS_CANARY_MAGIC) {
fprintf(stderr, "[TLS_CANARY] %s: g_tls_sll_head AFTER canary corrupted: 0x%016lx (expected 0x%016lx)\n",
location, g_tls_canary_after_sll_head, TLS_CANARY_MAGIC);
fflush(stderr);
assert(0 && "TLS canary after sll_head corrupted");
}
if (g_tls_canary_before_sll_count != TLS_CANARY_MAGIC) {
fprintf(stderr, "[TLS_CANARY] %s: g_tls_sll_count BEFORE canary corrupted: 0x%016lx (expected 0x%016lx)\n",
location, g_tls_canary_before_sll_count, TLS_CANARY_MAGIC);
fflush(stderr);
assert(0 && "TLS canary before sll_count corrupted");
}
if (g_tls_canary_after_sll_count != TLS_CANARY_MAGIC) {
fprintf(stderr, "[TLS_CANARY] %s: g_tls_sll_count AFTER canary corrupted: 0x%016lx (expected 0x%016lx)\n",
location, g_tls_canary_after_sll_count, TLS_CANARY_MAGIC);
fflush(stderr);
assert(0 && "TLS canary after sll_count corrupted");
}
}
// Periodic canary check (call every N operations)
// DEBUGGING: Changed from 1000 to 100 to catch TLS corruption faster
static inline void periodic_canary_check(uint64_t counter, const char* location) {
if (counter % 100 == 0) {
validate_tls_canaries(location);
}
}
// ============================================================================
// PRIORITY 4: Header Write Validation
// ============================================================================
// Validate header write parameters
static inline void validate_header_write(void* base_ptr, uint8_t class_idx, const char* location) {
if (base_ptr == NULL) {
fprintf(stderr, "[HEADER_WRITE] %s: NULL base pointer for class=%d\n",
location, class_idx);
fflush(stderr);
assert(0 && "NULL base pointer in header write");
}
if (class_idx >= 7) { // Class 7 is headerless
fprintf(stderr, "[HEADER_WRITE] %s: Invalid class_idx=%d for header write (class 7 is headerless)\n",
location, class_idx);
fflush(stderr);
assert(0 && "Invalid class_idx for header write");
}
if (!validate_ptr_range(base_ptr, location)) {
fprintf(stderr, "[HEADER_WRITE] %s: base_ptr=%p failed range validation\n",
location, base_ptr);
fflush(stderr);
assert(0 && "Header write pointer failed range validation");
}
}
// ============================================================================
// Debug Counters for Integrity Checks
// ============================================================================
extern _Atomic uint64_t g_integrity_check_class_bounds;
extern _Atomic uint64_t g_integrity_check_freelist;
extern _Atomic uint64_t g_integrity_check_canary;
extern _Atomic uint64_t g_integrity_check_header;
static inline void integrity_stats_dump(void) {
fprintf(stderr, "\n=== INTEGRITY CHECK STATISTICS ===\n");
fprintf(stderr, "Class bounds checks: %lu\n", g_integrity_check_class_bounds);
fprintf(stderr, "Freelist checks: %lu\n", g_integrity_check_freelist);
fprintf(stderr, "Canary checks: %lu\n", g_integrity_check_canary);
fprintf(stderr, "Header write checks: %lu\n", g_integrity_check_header);
fprintf(stderr, "==================================\n");
fflush(stderr);
}
#endif // HAKMEM_TINY_INTEGRITY_H