Files
hakmem/core/hakmem_tiny_integrity.h
Moe Charm (CI) 0543642dea Phase 1-3: Performance optimization - 12.7x improvement (mimalloc strategy)
## Performance Results

**Before (Phase 0)**: 627K ops/s (Random Mixed 256B, 100K iterations)
**After (Phase 3)**: 7.97M ops/s (Random Mixed 256B, 100K iterations)
**Improvement**: 12.7x faster 🎉

### Phase Breakdown
- **Phase 1 (Flag Enablement)**: 627K → 812K ops/s (+30%)
  - HEADER_CLASSIDX=1 (default ON)
  - AGGRESSIVE_INLINE=1 (default ON)
  - PREWARM_TLS=1 (default ON)

- **Phase 2 (Inline Integration)**: 812K → 7.01M ops/s (+8.6x)
  - TINY_ALLOC_FAST_POP_INLINE macro usage in hot paths
  - Eliminates function call overhead (5-10 cycles saved per alloc)

- **Phase 3 (Debug Overhead Removal)**: 7.01M → 7.97M ops/s (+14%)
  - HAK_CHECK_CLASS_IDX → compile-time no-op in release builds
  - Debug counters eliminated (atomic ops removed from hot path)
  - HAK_RET_ALLOC → ultra-fast inline macro (3-4 instructions)

## Implementation Strategy

Based on Task agent's mimalloc performance strategy analysis:
1. Root cause: Phase 7 flags were disabled by default (Makefile defaults)
2. Solution: Enable Phase 7 optimizations + aggressive inline + debug removal
3. Result: Matches optimization #1 and #2 expectations (+10-15% combined)

## Files Modified

### Core Changes
- **Makefile**: Phase 7 flags now default to ON (lines 131, 141, 151)
- **core/tiny_alloc_fast.inc.h**:
  - Aggressive inline macro integration (lines 589-595, 612-618)
  - Debug counter elimination (lines 191-203, 536-565)
- **core/hakmem_tiny_integrity.h**:
  - HAK_CHECK_CLASS_IDX → no-op in release (lines 15-29)
- **core/hakmem_tiny.c**:
  - HAK_RET_ALLOC → ultra-fast inline in release (lines 155-164)

### Documentation
- **OPTIMIZATION_REPORT_2025_11_12.md**: Comprehensive 300+ line analysis
- **OPTIMIZATION_QUICK_SUMMARY.md**: Executive summary with benchmarks

## Testing

 100K iterations: 7.97M ops/s (stable, 5 runs average)
 Stability: Fix #16 architecture preserved (100% pass rate maintained)
 Build: Clean compile with Phase 7 flags enabled

## Next Steps

- [ ] Larson benchmark comparison (HAKMEM vs mimalloc vs System)
- [ ] Fixed 256B test to match Phase 7 conditions
- [ ] Multi-threaded stability verification (1T-4T)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 13:57:46 +09:00

218 lines
9.2 KiB
C

#ifndef HAKMEM_TINY_INTEGRITY_H
#define HAKMEM_TINY_INTEGRITY_H
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <assert.h>
#include "hakmem_tiny.h"
// ============================================================================
// PRIORITY 1: TLS Array Bounds Checks
// ============================================================================
// Macro for bounds checking class_idx before TLS array access
#if HAKMEM_BUILD_RELEASE
// Phase 3: Release builds eliminate ALL debug checks (compile-time no-op)
#define HAK_CHECK_CLASS_IDX(class_idx, label) do { (void)(class_idx); (void)(label); } while(0)
#else
// Debug: Keep full validation
#define HAK_CHECK_CLASS_IDX(class_idx, label) do { \
if (__builtin_expect((class_idx) < 0 || (class_idx) >= TINY_NUM_CLASSES, 0)) { \
fprintf(stderr, "[%s] FATAL: class_idx=%d out of bounds [0,%d) at %s:%d\n", \
(label), (class_idx), TINY_NUM_CLASSES, __FILE__, __LINE__); \
fflush(stderr); \
assert(0 && "TLS array index out of bounds"); \
abort(); \
} \
} while(0)
#endif
// ============================================================================
// PRIORITY 2: Freelist Integrity Checks
// ============================================================================
// Validate freelist next pointer is within slab bounds
static inline int validate_freelist_next(void* ptr, void* next,
void* slab_base, size_t stride,
uint8_t class_idx,
size_t num_blocks,
const char* location) {
if (next == NULL) return 1; // NULL is valid (end of list)
void* slab_end = (uint8_t*)slab_base + (num_blocks * stride);
if (next < slab_base || next >= slab_end) {
fprintf(stderr, "[FREELIST_CORRUPT] %s: ptr=%p next=%p slab=[%p,%p) class=%d stride=%zu\n",
location, ptr, next, slab_base, slab_end, class_idx, stride);
fprintf(stderr, "[FREELIST_CORRUPT] next is OUT OF BOUNDS by %td bytes\n",
(uint8_t*)next < (uint8_t*)slab_base ?
((uint8_t*)slab_base - (uint8_t*)next) :
((uint8_t*)next - (uint8_t*)slab_end));
fflush(stderr);
assert(0 && "Freelist next pointer out of slab bounds");
return 0;
}
// Additional check: next pointer should be stride-aligned within slab
ptrdiff_t offset = (uint8_t*)next - (uint8_t*)slab_base;
if (offset % stride != 0) {
fprintf(stderr, "[FREELIST_MISALIGN] %s: ptr=%p next=%p offset=%td stride=%zu class=%d\n",
location, ptr, next, offset, stride, class_idx);
fprintf(stderr, "[FREELIST_MISALIGN] offset %% stride = %td (should be 0)\n",
offset % stride);
fflush(stderr);
assert(0 && "Freelist next pointer misaligned");
return 0;
}
return 1;
}
// Validate pointer is within valid address range (basic sanity)
static inline int validate_ptr_range(void* ptr, const char* location) {
if (ptr == NULL) return 1; // NULL is valid in some contexts
// Check for obviously invalid pointers
uintptr_t addr = (uintptr_t)ptr;
// DIAGNOSTIC: One-time log to confirm this function is actually running
static volatile int g_validate_logged = 0;
if (__builtin_expect(g_validate_logged == 0, 0)) {
g_validate_logged = 1;
fprintf(stderr, "[VALIDATE_PTR_RANGE] First call: %s ptr=%p\n", location, ptr);
fflush(stderr);
}
// Check for very low addresses (NULL-ish, likely corruption)
if (addr < 0x1000) {
fprintf(stderr, "[PTR_INVALID] %s: ptr=%p is suspiciously low (< 4KB)\n",
location, ptr);
fflush(stderr);
abort(); // Force abort (ignore assert settings)
}
// Check for very high addresses (kernel space on x86-64)
if (addr > 0x7fffffffffffULL) {
fprintf(stderr, "[PTR_INVALID] %s: ptr=%p is in kernel space range\n",
location, ptr);
fflush(stderr);
abort(); // Force abort
}
// Check for uninitialized/debug fill patterns (0xa2, 0xcc, 0xdd, 0xfe)
uint8_t* bytes = (uint8_t*)&addr;
if (bytes[0] == bytes[1] && bytes[1] == bytes[2] && bytes[2] == bytes[3] &&
bytes[3] == bytes[4] && bytes[4] == bytes[5] && bytes[5] == bytes[6] &&
bytes[6] == bytes[7]) {
// All bytes are the same - check for common debug patterns
if (bytes[0] == 0xa2 || bytes[0] == 0xcc || bytes[0] == 0xdd || bytes[0] == 0xfe) {
fprintf(stderr, "[PTR_INVALID] %s: ptr=%p is uninitialized (pattern 0x%02x)\n",
location, ptr, bytes[0]);
fprintf(stderr, "[PTR_INVALID] This indicates use-before-initialization!\n");
fprintf(stderr, "[PTR_INVALID] Common patterns: 0xa2=ASan, 0xcc=MSVC, 0xdd=freed, 0xfe=heap\n");
fflush(stderr);
abort(); // Force abort
}
}
return 1;
}
// ============================================================================
// PRIORITY 3: TLS Canaries
// ============================================================================
#define TLS_CANARY_MAGIC 0xDEADBEEFDEADBEEFULL
// External declarations (defined in hakmem_tiny.c)
extern __thread uint64_t g_tls_canary_before_sll_head;
extern __thread uint64_t g_tls_canary_after_sll_head;
extern __thread uint64_t g_tls_canary_before_sll_count;
extern __thread uint64_t g_tls_canary_after_sll_count;
// Validate TLS canaries (call periodically)
static inline void validate_tls_canaries(const char* location) {
if (g_tls_canary_before_sll_head != TLS_CANARY_MAGIC) {
fprintf(stderr, "[TLS_CANARY] %s: g_tls_sll_head BEFORE canary corrupted: 0x%016lx (expected 0x%016lx)\n",
location, g_tls_canary_before_sll_head, TLS_CANARY_MAGIC);
fflush(stderr);
assert(0 && "TLS canary before sll_head corrupted");
}
if (g_tls_canary_after_sll_head != TLS_CANARY_MAGIC) {
fprintf(stderr, "[TLS_CANARY] %s: g_tls_sll_head AFTER canary corrupted: 0x%016lx (expected 0x%016lx)\n",
location, g_tls_canary_after_sll_head, TLS_CANARY_MAGIC);
fflush(stderr);
assert(0 && "TLS canary after sll_head corrupted");
}
if (g_tls_canary_before_sll_count != TLS_CANARY_MAGIC) {
fprintf(stderr, "[TLS_CANARY] %s: g_tls_sll_count BEFORE canary corrupted: 0x%016lx (expected 0x%016lx)\n",
location, g_tls_canary_before_sll_count, TLS_CANARY_MAGIC);
fflush(stderr);
assert(0 && "TLS canary before sll_count corrupted");
}
if (g_tls_canary_after_sll_count != TLS_CANARY_MAGIC) {
fprintf(stderr, "[TLS_CANARY] %s: g_tls_sll_count AFTER canary corrupted: 0x%016lx (expected 0x%016lx)\n",
location, g_tls_canary_after_sll_count, TLS_CANARY_MAGIC);
fflush(stderr);
assert(0 && "TLS canary after sll_count corrupted");
}
}
// Periodic canary check (call every N operations)
// DEBUGGING: Changed from 1000 to 100 to catch TLS corruption faster
static inline void periodic_canary_check(uint64_t counter, const char* location) {
if (counter % 100 == 0) {
validate_tls_canaries(location);
}
}
// ============================================================================
// PRIORITY 4: Header Write Validation
// ============================================================================
// Validate header write parameters
static inline void validate_header_write(void* base_ptr, uint8_t class_idx, const char* location) {
if (base_ptr == NULL) {
fprintf(stderr, "[HEADER_WRITE] %s: NULL base pointer for class=%d\n",
location, class_idx);
fflush(stderr);
assert(0 && "NULL base pointer in header write");
}
if (class_idx >= 7) { // Class 7 is headerless
fprintf(stderr, "[HEADER_WRITE] %s: Invalid class_idx=%d for header write (class 7 is headerless)\n",
location, class_idx);
fflush(stderr);
assert(0 && "Invalid class_idx for header write");
}
if (!validate_ptr_range(base_ptr, location)) {
fprintf(stderr, "[HEADER_WRITE] %s: base_ptr=%p failed range validation\n",
location, base_ptr);
fflush(stderr);
assert(0 && "Header write pointer failed range validation");
}
}
// ============================================================================
// Debug Counters for Integrity Checks
// ============================================================================
extern _Atomic uint64_t g_integrity_check_class_bounds;
extern _Atomic uint64_t g_integrity_check_freelist;
extern _Atomic uint64_t g_integrity_check_canary;
extern _Atomic uint64_t g_integrity_check_header;
static inline void integrity_stats_dump(void) {
fprintf(stderr, "\n=== INTEGRITY CHECK STATISTICS ===\n");
fprintf(stderr, "Class bounds checks: %lu\n", g_integrity_check_class_bounds);
fprintf(stderr, "Freelist checks: %lu\n", g_integrity_check_freelist);
fprintf(stderr, "Canary checks: %lu\n", g_integrity_check_canary);
fprintf(stderr, "Header write checks: %lu\n", g_integrity_check_header);
fprintf(stderr, "==================================\n");
fflush(stderr);
}
#endif // HAKMEM_TINY_INTEGRITY_H