2025-11-07 00:37:33 +09:00
|
|
|
|
// hak_wrappers.inc.h — malloc/free/calloc/realloc wrappers (LD_PRELOAD-aware)
|
|
|
|
|
|
#ifndef HAK_WRAPPERS_INC_H
|
|
|
|
|
|
#define HAK_WRAPPERS_INC_H
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef HAKMEM_FORCE_LIBC_ALLOC_BUILD
|
|
|
|
|
|
|
|
|
|
|
|
// Sanitizer/diagnostic builds: bypass hakmem allocator completely.
|
|
|
|
|
|
void* malloc(size_t size) {
|
|
|
|
|
|
extern void* __libc_malloc(size_t);
|
|
|
|
|
|
return __libc_malloc(size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void free(void* ptr) {
|
|
|
|
|
|
if (!ptr) return;
|
|
|
|
|
|
extern void __libc_free(void*);
|
|
|
|
|
|
__libc_free(ptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void* calloc(size_t nmemb, size_t size) {
|
|
|
|
|
|
extern void* __libc_calloc(size_t, size_t);
|
|
|
|
|
|
return __libc_calloc(nmemb, size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void* realloc(void* ptr, size_t size) {
|
|
|
|
|
|
extern void* __libc_realloc(void*, size_t);
|
|
|
|
|
|
return __libc_realloc(ptr, size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
2025-11-11 00:02:24 +09:00
|
|
|
|
#include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback
|
|
|
|
|
|
#include "front_gate_classifier.h" // Box FG: pointer classification (header/reg)
|
|
|
|
|
|
|
2025-11-07 00:37:33 +09:00
|
|
|
|
// malloc wrapper - intercepts system malloc() calls
|
|
|
|
|
|
__thread uint64_t g_malloc_total_calls = 0;
|
|
|
|
|
|
__thread uint64_t g_malloc_tiny_size_match = 0;
|
|
|
|
|
|
__thread uint64_t g_malloc_fast_path_tried = 0;
|
|
|
|
|
|
__thread uint64_t g_malloc_fast_path_null = 0;
|
|
|
|
|
|
__thread uint64_t g_malloc_slow_path = 0;
|
|
|
|
|
|
|
|
|
|
|
|
extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES];
|
|
|
|
|
|
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
// CRITICAL FIX (BUG #10): Use cached g_jemalloc_loaded instead of calling hak_jemalloc_loaded()
|
|
|
|
|
|
// The function call version triggers infinite recursion: malloc → hak_jemalloc_loaded → dlopen → malloc
|
|
|
|
|
|
extern int g_jemalloc_loaded; // Cached during hak_init_impl(), defined in hakmem.c
|
|
|
|
|
|
|
2025-11-07 00:37:33 +09:00
|
|
|
|
void* malloc(size_t size) {
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
// CRITICAL FIX (BUG #7): Increment lock depth FIRST, before ANY libc calls
|
|
|
|
|
|
// This prevents infinite recursion when getenv/fprintf/dlopen call malloc
|
|
|
|
|
|
g_hakmem_lock_depth++;
|
|
|
|
|
|
|
|
|
|
|
|
// Guard against recursion during initialization
|
2025-11-07 00:37:33 +09:00
|
|
|
|
if (__builtin_expect(g_initializing != 0, 0)) {
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
g_hakmem_lock_depth--;
|
2025-11-07 00:37:33 +09:00
|
|
|
|
extern void* __libc_malloc(size_t);
|
|
|
|
|
|
return __libc_malloc(size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
// Now safe to call getenv/fprintf/dlopen (will use __libc_malloc if needed)
|
2025-11-07 01:15:28 +09:00
|
|
|
|
// Cache getenv result to avoid 8.51% CPU overhead on hot path
|
|
|
|
|
|
static _Atomic int debug_enabled = -1; // -1 = uninitialized
|
2025-11-07 00:37:33 +09:00
|
|
|
|
static _Atomic int debug_count = 0;
|
2025-11-07 01:15:28 +09:00
|
|
|
|
if (__builtin_expect(debug_enabled < 0, 0)) {
|
|
|
|
|
|
debug_enabled = (getenv("HAKMEM_SFC_DEBUG") != NULL) ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (debug_enabled && debug_count < 100) {
|
2025-11-07 00:37:33 +09:00
|
|
|
|
int n = atomic_fetch_add(&debug_count, 1);
|
|
|
|
|
|
if (n < 20) fprintf(stderr, "[SFC_DEBUG] malloc(%zu)\n", size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (__builtin_expect(hak_force_libc_alloc(), 0)) {
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
g_hakmem_lock_depth--;
|
2025-11-07 00:37:33 +09:00
|
|
|
|
extern void* __libc_malloc(size_t);
|
|
|
|
|
|
return __libc_malloc(size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int ld_mode = hak_ld_env_mode();
|
|
|
|
|
|
if (ld_mode) {
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
if (hak_ld_block_jemalloc() && g_jemalloc_loaded) {
|
|
|
|
|
|
g_hakmem_lock_depth--;
|
2025-11-07 00:37:33 +09:00
|
|
|
|
extern void* __libc_malloc(size_t);
|
|
|
|
|
|
return __libc_malloc(size);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!g_initialized) { hak_init(); }
|
|
|
|
|
|
if (g_initializing) {
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
g_hakmem_lock_depth--;
|
2025-11-07 00:37:33 +09:00
|
|
|
|
extern void* __libc_malloc(size_t);
|
|
|
|
|
|
return __libc_malloc(size);
|
|
|
|
|
|
}
|
2025-11-07 01:15:28 +09:00
|
|
|
|
// Cache HAKMEM_LD_SAFE to avoid repeated getenv on hot path
|
|
|
|
|
|
static _Atomic int ld_safe_mode = -1; // -1 = uninitialized
|
|
|
|
|
|
if (__builtin_expect(ld_safe_mode < 0, 0)) {
|
|
|
|
|
|
const char* lds = getenv("HAKMEM_LD_SAFE");
|
|
|
|
|
|
ld_safe_mode = (lds ? atoi(lds) : 1);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (ld_safe_mode >= 2 || size > TINY_MAX_SIZE) {
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
g_hakmem_lock_depth--;
|
2025-11-07 00:37:33 +09:00
|
|
|
|
extern void* __libc_malloc(size_t);
|
|
|
|
|
|
return __libc_malloc(size);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void* ptr = hak_alloc_at(size, HAK_CALLSITE());
|
|
|
|
|
|
g_hakmem_lock_depth--;
|
|
|
|
|
|
return ptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void free(void* ptr) {
|
|
|
|
|
|
atomic_fetch_add_explicit(&g_free_wrapper_calls, 1, memory_order_relaxed);
|
|
|
|
|
|
if (!ptr) return;
|
2025-11-11 00:02:24 +09:00
|
|
|
|
do { static int on=-1; if (on==-1){ const char* e=getenv("HAKMEM_FREE_WRAP_TRACE"); on=(e&&*e&&*e!='0')?1:0;} if(on){ fprintf(stderr,"[WRAP_FREE_ENTER] ptr=%p depth=%d init=%d\n", ptr, g_hakmem_lock_depth, g_initializing); } } while(0);
|
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
|
// Debug safety: guard obviously invalid tiny integers to avoid libc crash and collect trace
|
|
|
|
|
|
if ((uintptr_t)ptr < 4096) {
|
|
|
|
|
|
ptr_trace_dump_now("wrap_small_ptr");
|
|
|
|
|
|
fprintf(stderr, "[FREE_SMALL_PTR] ignore ptr=%p (likely header-corruption sentinel)\n", ptr);
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
// Classify pointer BEFORE early libc fallbacks to avoid misrouting Tiny pointers
|
|
|
|
|
|
// This is safe: classifier uses header probe and registry; does not allocate.
|
|
|
|
|
|
int is_hakmem_owned = 0;
|
|
|
|
|
|
{
|
|
|
|
|
|
ptr_classification_t c = classify_ptr(ptr);
|
|
|
|
|
|
switch (c.kind) {
|
|
|
|
|
|
case PTR_KIND_TINY_HEADER:
|
|
|
|
|
|
case PTR_KIND_TINY_HEADERLESS:
|
|
|
|
|
|
case PTR_KIND_POOL_TLS:
|
|
|
|
|
|
is_hakmem_owned = 1; break;
|
|
|
|
|
|
default: break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (is_hakmem_owned) {
|
|
|
|
|
|
// Route to hak_free_at even if lock_depth>0(ログ抑制のためptr_traceのみ使用)
|
|
|
|
|
|
g_hakmem_lock_depth++;
|
|
|
|
|
|
hak_free_at(ptr, 0, HAK_CALLSITE());
|
|
|
|
|
|
g_hakmem_lock_depth--;
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
2025-11-11 01:47:06 +09:00
|
|
|
|
// Front Gate libc bypass detection (quiet in release)
|
2025-11-11 00:02:24 +09:00
|
|
|
|
static _Atomic uint64_t fg_libc_bypass_count = 0;
|
2025-11-11 01:47:06 +09:00
|
|
|
|
|
|
|
|
|
|
if (g_hakmem_lock_depth > 0) {
|
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
2025-11-11 00:02:24 +09:00
|
|
|
|
uint64_t count = atomic_fetch_add_explicit(&fg_libc_bypass_count, 1, memory_order_relaxed);
|
2025-11-11 01:47:06 +09:00
|
|
|
|
if (count < 10) {
|
2025-11-11 00:02:24 +09:00
|
|
|
|
fprintf(stderr, "[FG_LIBC_BYPASS] lockdepth=%d count=%llu ptr=%p\n", g_hakmem_lock_depth, (unsigned long long)count, ptr);
|
|
|
|
|
|
}
|
2025-11-11 01:47:06 +09:00
|
|
|
|
#else
|
|
|
|
|
|
(void)fg_libc_bypass_count;
|
|
|
|
|
|
#endif
|
2025-11-11 10:00:36 +09:00
|
|
|
|
// Safety: If this is a HAKMEM-owned header allocation, free raw correctly
|
|
|
|
|
|
do {
|
|
|
|
|
|
void* raw = (char*)ptr - HEADER_SIZE;
|
|
|
|
|
|
int safe_same_page = (((uintptr_t)ptr & 0xFFFu) >= HEADER_SIZE);
|
|
|
|
|
|
if (!safe_same_page) {
|
|
|
|
|
|
if (!hak_is_memory_readable(raw)) break;
|
|
|
|
|
|
}
|
|
|
|
|
|
AllocHeader* hdr = (AllocHeader*)raw;
|
|
|
|
|
|
if (hdr->magic == HAKMEM_MAGIC) {
|
|
|
|
|
|
// Dispatch based on allocation method
|
|
|
|
|
|
if (hdr->method == ALLOC_METHOD_MALLOC) {
|
|
|
|
|
|
extern void __libc_free(void*);
|
|
|
|
|
|
ptr_trace_dump_now("wrap_libc_lockdepth_hak_hdr_malloc");
|
|
|
|
|
|
__libc_free(raw);
|
|
|
|
|
|
return;
|
|
|
|
|
|
} else if (hdr->method == ALLOC_METHOD_MMAP) {
|
|
|
|
|
|
ptr_trace_dump_now("wrap_libc_lockdepth_hak_hdr_mmap");
|
|
|
|
|
|
hkm_sys_munmap(raw, hdr->size);
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
} while (0);
|
|
|
|
|
|
// Unknown pointer or non-HAKMEM: fall back to libc free(ptr)
|
2025-11-11 01:47:06 +09:00
|
|
|
|
extern void __libc_free(void*);
|
|
|
|
|
|
ptr_trace_dump_now("wrap_libc_lockdepth");
|
|
|
|
|
|
__libc_free(ptr);
|
|
|
|
|
|
return;
|
2025-11-11 00:02:24 +09:00
|
|
|
|
}
|
2025-11-11 01:47:06 +09:00
|
|
|
|
if (__builtin_expect(g_initializing != 0, 0)) {
|
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
2025-11-11 00:02:24 +09:00
|
|
|
|
uint64_t count = atomic_fetch_add_explicit(&fg_libc_bypass_count, 1, memory_order_relaxed);
|
2025-11-11 01:47:06 +09:00
|
|
|
|
if (count < 10) {
|
2025-11-11 00:02:24 +09:00
|
|
|
|
fprintf(stderr, "[FG_LIBC_BYPASS] init=%d count=%llu ptr=%p\n", g_initializing, (unsigned long long)count, ptr);
|
|
|
|
|
|
}
|
2025-11-11 01:47:06 +09:00
|
|
|
|
#endif
|
|
|
|
|
|
extern void __libc_free(void*);
|
|
|
|
|
|
ptr_trace_dump_now("wrap_libc_init");
|
|
|
|
|
|
__libc_free(ptr);
|
|
|
|
|
|
return;
|
2025-11-11 00:02:24 +09:00
|
|
|
|
}
|
|
|
|
|
|
if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_force"); __libc_free(ptr); return; }
|
2025-11-07 00:37:33 +09:00
|
|
|
|
if (hak_ld_env_mode()) {
|
2025-11-11 00:02:24 +09:00
|
|
|
|
if (hak_ld_block_jemalloc() && g_jemalloc_loaded) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_jemalloc"); __libc_free(ptr); return; }
|
2025-11-07 00:37:33 +09:00
|
|
|
|
if (!g_initialized) { hak_init(); }
|
2025-11-11 00:02:24 +09:00
|
|
|
|
if (g_initializing) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_init"); __libc_free(ptr); return; }
|
2025-11-07 00:37:33 +09:00
|
|
|
|
}
|
|
|
|
|
|
g_hakmem_lock_depth++;
|
|
|
|
|
|
hak_free_at(ptr, 0, HAK_CALLSITE());
|
|
|
|
|
|
g_hakmem_lock_depth--;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void* calloc(size_t nmemb, size_t size) {
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
// CRITICAL FIX (BUG #8): Increment lock depth FIRST, before ANY libc calls
|
|
|
|
|
|
g_hakmem_lock_depth++;
|
|
|
|
|
|
|
|
|
|
|
|
// Early check for recursion (lock depth already incremented by outer call)
|
|
|
|
|
|
if (g_hakmem_lock_depth > 1) {
|
|
|
|
|
|
g_hakmem_lock_depth--;
|
|
|
|
|
|
extern void* __libc_calloc(size_t, size_t);
|
|
|
|
|
|
return __libc_calloc(nmemb, size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (__builtin_expect(g_initializing != 0, 0)) {
|
|
|
|
|
|
g_hakmem_lock_depth--;
|
|
|
|
|
|
extern void* __libc_calloc(size_t, size_t);
|
|
|
|
|
|
return __libc_calloc(nmemb, size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Overflow check
|
|
|
|
|
|
if (size != 0 && nmemb > (SIZE_MAX / size)) {
|
|
|
|
|
|
g_hakmem_lock_depth--;
|
|
|
|
|
|
errno = ENOMEM;
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (__builtin_expect(hak_force_libc_alloc(), 0)) {
|
|
|
|
|
|
g_hakmem_lock_depth--;
|
|
|
|
|
|
extern void* __libc_calloc(size_t, size_t);
|
|
|
|
|
|
return __libc_calloc(nmemb, size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-07 00:37:33 +09:00
|
|
|
|
int ld_mode = hak_ld_env_mode();
|
|
|
|
|
|
if (ld_mode) {
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
if (hak_ld_block_jemalloc() && g_jemalloc_loaded) {
|
|
|
|
|
|
g_hakmem_lock_depth--;
|
|
|
|
|
|
extern void* __libc_calloc(size_t, size_t);
|
|
|
|
|
|
return __libc_calloc(nmemb, size);
|
|
|
|
|
|
}
|
2025-11-07 00:37:33 +09:00
|
|
|
|
if (!g_initialized) { hak_init(); }
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
if (g_initializing) {
|
|
|
|
|
|
g_hakmem_lock_depth--;
|
|
|
|
|
|
extern void* __libc_calloc(size_t, size_t);
|
|
|
|
|
|
return __libc_calloc(nmemb, size);
|
|
|
|
|
|
}
|
2025-11-07 01:15:28 +09:00
|
|
|
|
// Reuse cached ld_safe_mode from malloc (same static variable scope won't work, use inline function instead)
|
|
|
|
|
|
// For now, duplicate the caching logic
|
|
|
|
|
|
static _Atomic int ld_safe_mode_calloc = -1;
|
|
|
|
|
|
if (__builtin_expect(ld_safe_mode_calloc < 0, 0)) {
|
|
|
|
|
|
const char* lds = getenv("HAKMEM_LD_SAFE");
|
|
|
|
|
|
ld_safe_mode_calloc = (lds ? atoi(lds) : 1);
|
|
|
|
|
|
}
|
2025-11-07 00:37:33 +09:00
|
|
|
|
size_t total = nmemb * size;
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
if (ld_safe_mode_calloc >= 2 || total > TINY_MAX_SIZE) {
|
|
|
|
|
|
g_hakmem_lock_depth--;
|
|
|
|
|
|
extern void* __libc_calloc(size_t, size_t);
|
|
|
|
|
|
return __libc_calloc(nmemb, size);
|
|
|
|
|
|
}
|
2025-11-07 00:37:33 +09:00
|
|
|
|
}
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
|
2025-11-07 00:37:33 +09:00
|
|
|
|
size_t total_size = nmemb * size;
|
|
|
|
|
|
void* ptr = hak_alloc_at(total_size, HAK_CALLSITE());
|
|
|
|
|
|
if (ptr) { memset(ptr, 0, total_size); }
|
|
|
|
|
|
g_hakmem_lock_depth--;
|
|
|
|
|
|
return ptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void* realloc(void* ptr, size_t size) {
|
|
|
|
|
|
if (g_hakmem_lock_depth > 0) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
|
|
|
|
|
|
if (__builtin_expect(g_initializing != 0, 0)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
|
|
|
|
|
|
if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
|
|
|
|
|
|
int ld_mode = hak_ld_env_mode();
|
|
|
|
|
|
if (ld_mode) {
|
feat: Phase 7 + Phase 2 - Massive performance & stability improvements
Performance Achievements:
- Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed)
- Single-thread: +24% (2.71M → 3.36M ops/s Larson)
- 4T stability: 0% → 95% (19/20 success rate)
- Overall: 91.3% of System malloc average (target was 40-55%) ✓
Phase 7 (Tasks 1-3): Core Optimizations
- Task 1: Header validation removal (Region-ID direct lookup)
- Task 2: Aggressive inline (TLS cache access optimization)
- Task 3: Pre-warm TLS cache (eliminate cold-start penalty)
Result: +180-280% improvement, 85-146% of System malloc
Critical Bug Fixes:
- Fix 64B allocation crash (size-to-class +1 for header)
- Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11)
- Remove malloc fallback (30% → 50% stability)
Phase 2a: SuperSlab Dynamic Expansion (CRITICAL)
- Implement mimalloc-style chunk linking
- Unlimited slab expansion (no more OOM at 32 slabs)
- Fix chunk initialization bug (bitmap=0x00000001 after expansion)
Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h
Result: 50% → 95% stability (19/20 4T success)
Phase 2b: TLS Cache Adaptive Sizing
- Dynamic capacity: 16-2048 slots based on usage
- High-water mark tracking + exponential growth/shrink
- Expected: +3-10% performance, -30-50% memory
Files: core/tiny_adaptive_sizing.c/h (new)
Phase 2c: BigCache Dynamic Hash Table
- Migrate from fixed 256×8 array to dynamic hash table
- Auto-resize: 256 → 512 → 1024 → 65,536 buckets
- Improved hash function (FNV-1a) + collision chaining
Files: core/hakmem_bigcache.c/h
Expected: +10-20% cache hit rate
Design Flaws Analysis:
- Identified 6 components with fixed-capacity bottlenecks
- SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM)
- Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters)
Documentation:
- 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md)
- Implementation guides, test results, production readiness
- Bug fix reports, root cause analysis
Build System:
- Makefile: phase7 targets, PREWARM_TLS flag
- Auto dependency generation (-MMD -MP) for .inc files
Known Issues:
- 4T stability: 19/20 (95%) - investigating 1 failure for 100%
- L2.5 Pool dynamic sharding: design only (needs 2-3 days integration)
🤖 Generated with Claude Code (https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
|
|
|
|
if (hak_ld_block_jemalloc() && g_jemalloc_loaded) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
|
2025-11-07 00:37:33 +09:00
|
|
|
|
if (!g_initialized) { hak_init(); }
|
|
|
|
|
|
if (g_initializing) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
|
|
|
|
|
|
}
|
|
|
|
|
|
if (ptr == NULL) { return malloc(size); }
|
|
|
|
|
|
if (size == 0) { free(ptr); return NULL; }
|
|
|
|
|
|
void* new_ptr = malloc(size);
|
|
|
|
|
|
if (!new_ptr) return NULL;
|
|
|
|
|
|
memcpy(new_ptr, ptr, size);
|
|
|
|
|
|
free(ptr);
|
|
|
|
|
|
return new_ptr;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#endif // HAKMEM_FORCE_LIBC_ALLOC_BUILD
|
|
|
|
|
|
|
|
|
|
|
|
#endif // HAK_WRAPPERS_INC_H
|