2025-11-09 18:55:50 +09:00
|
|
|
hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \
|
|
|
|
|
core/hakmem_config.h core/hakmem_features.h core/hakmem_internal.h \
|
2025-12-01 16:37:59 +09:00
|
|
|
core/hakmem_sys.h core/hakmem_whale.h core/box/ptr_type_box.h \
|
2025-12-03 10:34:39 +09:00
|
|
|
core/hakmem_bigcache.h core/hakmem_pool.h \
|
|
|
|
|
core/box/hak_lane_classify.inc.h core/hakmem_l25_pool.h \
|
2025-12-01 16:37:59 +09:00
|
|
|
core/hakmem_policy.h core/hakmem_learner.h core/hakmem_size_hist.h \
|
|
|
|
|
core/hakmem_ace.h core/hakmem_site_rules.h core/hakmem_tiny.h \
|
|
|
|
|
core/hakmem_trace.h core/hakmem_tiny_mini_mag.h \
|
|
|
|
|
core/hakmem_tiny_superslab.h core/superslab/superslab_types.h \
|
|
|
|
|
core/hakmem_tiny_superslab_constants.h core/superslab/superslab_inline.h \
|
|
|
|
|
core/superslab/superslab_types.h core/superslab/../tiny_box_geometry.h \
|
2025-11-26 12:33:49 +09:00
|
|
|
core/superslab/../hakmem_tiny_superslab_constants.h \
|
2025-12-10 09:08:18 +09:00
|
|
|
core/superslab/../hakmem_tiny_config.h \
|
|
|
|
|
core/superslab/../hakmem_super_registry.h \
|
|
|
|
|
core/superslab/../hakmem_tiny_superslab.h \
|
|
|
|
|
core/superslab/../box/ss_addr_map_box.h \
|
|
|
|
|
core/superslab/../box/../hakmem_build_flags.h \
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/superslab/../box/super_reg_box.h \
|
|
|
|
|
core/superslab/../box/ss_pt_lookup_box.h \
|
|
|
|
|
core/superslab/../box/ss_pt_types_box.h \
|
|
|
|
|
core/superslab/../box/ss_pt_env_box.h \
|
|
|
|
|
core/superslab/../box/ss_pt_env_box.h core/tiny_debug_ring.h \
|
2025-11-26 12:33:49 +09:00
|
|
|
core/tiny_remote.h core/hakmem_tiny_superslab_constants.h \
|
2025-12-03 10:34:39 +09:00
|
|
|
core/tiny_fastcache.h core/hakmem_env_cache.h \
|
|
|
|
|
core/box/tiny_next_ptr_box.h core/hakmem_tiny_config.h \
|
|
|
|
|
core/tiny_nextptr.h core/tiny_region_id.h core/tiny_box_geometry.h \
|
2025-12-10 09:08:18 +09:00
|
|
|
core/ptr_track.h core/tiny_debug_api.h core/box/tiny_layout_box.h \
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/../hakmem_tiny_config.h core/box/../hakmem_build_flags.h \
|
|
|
|
|
core/box/tiny_header_box.h core/box/tiny_layout_box.h \
|
2025-12-10 09:08:18 +09:00
|
|
|
core/box/../tiny_region_id.h core/hakmem_elo.h core/hakmem_ace_stats.h \
|
|
|
|
|
core/hakmem_batch.h core/hakmem_evo.h core/hakmem_debug.h \
|
|
|
|
|
core/hakmem_prof.h core/hakmem_syscall.h core/hakmem_ace_controller.h \
|
2025-12-03 13:28:44 +09:00
|
|
|
core/hakmem_ace_metrics.h core/hakmem_ace_ucb1.h \
|
2025-12-12 03:12:28 +09:00
|
|
|
core/box/bench_fast_box.h core/box/mid_hotbox_v3_box.h \
|
|
|
|
|
core/box/tiny_geometry_box.h \
|
|
|
|
|
core/box/../hakmem_tiny_superslab_internal.h \
|
|
|
|
|
core/box/../hakmem_build_flags.h core/box/../hakmem_tiny_superslab.h \
|
|
|
|
|
core/box/../box/ss_hot_cold_box.h \
|
|
|
|
|
core/box/../box/../superslab/superslab_types.h \
|
|
|
|
|
core/box/../box/ss_allocation_box.h core/hakmem_tiny_superslab.h \
|
|
|
|
|
core/box/../hakmem_debug_master.h core/box/../hakmem_tiny.h \
|
|
|
|
|
core/box/../hakmem_tiny_config.h core/box/../hakmem_shared_pool.h \
|
|
|
|
|
core/box/../superslab/superslab_types.h core/box/../hakmem_internal.h \
|
|
|
|
|
core/box/../tiny_region_id.h core/box/../hakmem_tiny_integrity.h \
|
|
|
|
|
core/box/../box/slab_freelist_atomic.h \
|
|
|
|
|
core/box/../superslab/superslab_inline.h \
|
|
|
|
|
core/box/mid_hotbox_v3_env_box.h core/ptr_trace.h \
|
|
|
|
|
core/hakmem_trace_master.h core/hakmem_stats_master.h \
|
|
|
|
|
core/box/hak_kpi_util.inc.h core/box/hak_core_init.inc.h \
|
|
|
|
|
core/hakmem_phase7_config.h core/box/libm_reloc_guard_box.h \
|
|
|
|
|
core/box/init_bench_preset_box.h core/box/init_diag_box.h \
|
|
|
|
|
core/box/init_env_box.h core/box/../tiny_destructors.h \
|
2025-12-03 20:42:28 +09:00
|
|
|
core/box/ss_hot_prewarm_box.h core/box/hak_alloc_api.inc.h \
|
|
|
|
|
core/box/../hakmem_tiny.h core/box/../hakmem_pool.h \
|
2025-12-07 22:49:28 +09:00
|
|
|
core/box/../hakmem_smallmid.h core/box/tiny_heap_env_box.h \
|
2025-12-07 23:06:50 +09:00
|
|
|
core/box/c7_hotpath_env_box.h core/box/tiny_heap_box.h \
|
2025-12-12 03:12:28 +09:00
|
|
|
core/box/../hakmem_tiny_superslab.h core/box/../tiny_tls.h \
|
|
|
|
|
core/box/../tiny_box_geometry.h core/box/tiny_stats_box.h \
|
|
|
|
|
core/box/tiny_c7_hotbox.h core/box/mid_large_config_box.h \
|
|
|
|
|
core/box/../hakmem_config.h core/box/../hakmem_features.h \
|
|
|
|
|
core/box/hak_free_api.inc.h core/box/../hakmem_trace_master.h \
|
2025-12-08 21:30:21 +09:00
|
|
|
core/box/front_gate_v2.h core/box/external_guard_box.h \
|
|
|
|
|
core/box/../hakmem_stats_master.h core/box/ss_slab_meta_box.h \
|
|
|
|
|
core/box/../superslab/superslab_types.h core/box/slab_freelist_atomic.h \
|
|
|
|
|
core/box/fg_tiny_gate_box.h core/box/tiny_free_gate_box.h \
|
|
|
|
|
core/box/ptr_type_box.h core/box/ptr_conversion_box.h \
|
2025-12-12 03:12:28 +09:00
|
|
|
core/box/tiny_ptr_bridge_box.h core/box/../tiny_free_fast_v2.inc.h \
|
|
|
|
|
core/box/../box/tls_sll_box.h core/box/../box/../hakmem_internal.h \
|
2025-11-30 15:27:53 +09:00
|
|
|
core/box/../box/../hakmem_tiny_config.h \
|
2025-11-29 06:57:03 +09:00
|
|
|
core/box/../box/../hakmem_build_flags.h \
|
|
|
|
|
core/box/../box/../hakmem_debug_master.h \
|
|
|
|
|
core/box/../box/../tiny_remote.h core/box/../box/../tiny_region_id.h \
|
Add Box I (Integrity), Box E (Expansion), and comprehensive P0 debugging infrastructure
## Major Additions
### 1. Box I: Integrity Verification System (NEW - 703 lines)
- Files: core/box/integrity_box.h (267 lines), core/box/integrity_box.c (436 lines)
- Purpose: Unified integrity checking across all HAKMEM subsystems
- Features:
* 4-level integrity checking (0-4, compile-time controlled)
* Priority 1: TLS array bounds validation
* Priority 2: Freelist pointer validation
* Priority 3: TLS canary monitoring
* Priority ALPHA: Slab metadata invariant checking (5 invariants)
* Atomic statistics tracking (thread-safe)
* Beautiful BOX_BOUNDARY design pattern
### 2. Box E: SuperSlab Expansion System (COMPLETE)
- Files: core/box/superslab_expansion_box.h, core/box/superslab_expansion_box.c
- Purpose: Safe SuperSlab expansion with TLS state guarantee
- Features:
* Immediate slab 0 binding after expansion
* TLS state snapshot and restoration
* Design by Contract (pre/post-conditions, invariants)
* Thread-safe with mutex protection
### 3. Comprehensive Integrity Checking System
- File: core/hakmem_tiny_integrity.h (NEW)
- Unified validation functions for all allocator subsystems
- Uninitialized memory pattern detection (0xa2, 0xcc, 0xdd, 0xfe)
- Pointer range validation (null-page, kernel-space)
### 4. P0 Bug Investigation - Root Cause Identified
**Bug**: SEGV at iteration 28440 (deterministic with seed 42)
**Pattern**: 0xa2a2a2a2a2a2a2a2 (uninitialized/ASan poisoning)
**Location**: TLS SLL (Single-Linked List) cache layer
**Root Cause**: Race condition or use-after-free in TLS list management (class 0)
**Detection**: Box I successfully caught invalid pointer at exact crash point
### 5. Defensive Improvements
- Defensive memset in SuperSlab allocation (all metadata arrays)
- Enhanced pointer validation with pattern detection
- BOX_BOUNDARY markers throughout codebase (beautiful modular design)
- 5 metadata invariant checks in allocation/free/refill paths
## Integration Points
- Modified 13 files with Box I/E integration
- Added 10+ BOX_BOUNDARY markers
- 5 critical integrity check points in P0 refill path
## Test Results (100K iterations)
- Baseline: 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- P0 Bug: Still crashes at 28440 iterations (TLS SLL race condition)
- Root cause: Identified but not yet fixed (requires deeper investigation)
## Performance
- Box I overhead: Zero in release builds (HAKMEM_INTEGRITY_LEVEL=0)
- Debug builds: Full validation enabled (HAKMEM_INTEGRITY_LEVEL=4)
- Beautiful modular design maintains clean separation of concerns
## Known Issues
- P0 Bug at 28440 iterations: Race condition in TLS SLL cache (class 0)
- Cause: Use-after-free or race in remote free draining
- Next step: Valgrind investigation to pinpoint exact corruption location
## Code Quality
- Total new code: ~1400 lines (Box I + Box E + integrity system)
- Design: Beautiful Box Theory with clear boundaries
- Modularity: Complete separation of concerns
- Documentation: Comprehensive inline comments and BOX_BOUNDARY markers
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 02:45:00 +09:00
|
|
|
core/box/../box/../hakmem_tiny_integrity.h \
|
2025-12-04 12:55:53 +09:00
|
|
|
core/box/../box/../ptr_track.h core/box/../box/../tiny_debug_ring.h \
|
|
|
|
|
core/box/../box/ss_addr_map_box.h \
|
2025-11-21 23:00:24 +09:00
|
|
|
core/box/../box/../superslab/superslab_inline.h \
|
2025-12-04 12:55:53 +09:00
|
|
|
core/box/../box/tiny_ptr_bridge_box.h core/box/../box/tiny_header_box.h \
|
|
|
|
|
core/box/../box/tls_sll_drain_box.h core/box/../box/tls_sll_box.h \
|
|
|
|
|
core/box/../box/slab_recycling_box.h \
|
2025-11-30 15:27:53 +09:00
|
|
|
core/box/../box/../hakmem_tiny_superslab.h \
|
2025-12-04 12:55:53 +09:00
|
|
|
core/box/../box/ss_hot_cold_box.h core/box/../box/ss_release_guard_box.h \
|
|
|
|
|
core/box/../box/../hakmem_tiny_superslab_internal.h \
|
2025-12-01 16:37:59 +09:00
|
|
|
core/box/../box/free_local_box.h core/box/../box/ptr_type_box.h \
|
|
|
|
|
core/box/../box/free_publish_box.h core/hakmem_tiny.h \
|
2025-12-04 12:55:53 +09:00
|
|
|
core/tiny_region_id.h core/box/../hakmem_env_cache.h \
|
|
|
|
|
core/box/../superslab/superslab_inline.h \
|
|
|
|
|
core/box/../box/ss_slab_meta_box.h core/box/../box/free_remote_box.h \
|
2025-12-03 13:28:44 +09:00
|
|
|
core/hakmem_tiny_integrity.h core/box/../box/ptr_conversion_box.h \
|
2025-12-12 03:12:28 +09:00
|
|
|
core/box/free_dispatch_stats_box.h core/box/region_id_v6_box.h \
|
|
|
|
|
core/box/smallsegment_v6_box.h core/box/hak_wrappers.inc.h \
|
Phase FREE-DISPATCHER-OPT-1: free dispatcher 統計計測
**目的**: free dispatcher(29%)の内訳を細分化して計測。
**実装内容**:
- FreeDispatchStats 構造体追加(ENV: HAKMEM_FREE_DISPATCH_STATS, default 0)
- カウンタ: total_calls / domain (tiny/mid/large) / route (ultra/legacy/pool/v6) / env_checks / route_for_class_calls
- hak_free_at / tiny_route_for_class / tiny_route_snapshot_init にカウンタ埋め込み
- 挙動変更なし(計測のみ、ENV OFF 時は overhead ゼロ)
**計測結果**:
Mixed 16-1024B (1M iter, ws=400):
- total=8,081, route_calls=267,967, env_checks=9
- BENCH_FAST_FRONT により大半は早期リターン
- route_for_class は主に alloc 側で呼ばれる(267k calls vs 8k frees)
- ENV check は初期化時の 9回のみ(snapshot 効果)
C6-heavy (257-768B, 1M iter, ws=400):
- total=500,099, route_calls=1,034, env_checks=9
- fg_classify_domain に到達する free が多い
- route_for_class 呼び出しは極小(snapshot 効果)
**結論**:
- ENV check は既に十分最適化されている(初期化時のみ)
- route_for_class は alloc 側での呼び出しが主で、free 側は snapshot で O(1)
- 次フェーズ(OPT-2)では別のアプローチを検討
**ドキュメント追加**:
- docs/analysis/FREE_DISPATCHER_ANALYSIS.md(新規)
- CURRENT_TASK.md に Phase FREE-DISPATCHER-OPT-1 セクション追加
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-11 21:21:40 +09:00
|
|
|
core/box/front_gate_classifier.h core/box/../front/malloc_tiny_fast.h \
|
2025-11-20 07:32:30 +09:00
|
|
|
core/box/../front/../hakmem_build_flags.h \
|
|
|
|
|
core/box/../front/../hakmem_tiny_config.h \
|
2025-11-28 01:45:45 +09:00
|
|
|
core/box/../front/../superslab/superslab_inline.h \
|
|
|
|
|
core/box/../front/../box/ss_slab_meta_box.h \
|
2025-11-20 07:32:30 +09:00
|
|
|
core/box/../front/tiny_unified_cache.h \
|
2025-12-04 12:55:53 +09:00
|
|
|
core/box/../front/../box/ptr_type_box.h \
|
2025-11-30 15:27:53 +09:00
|
|
|
core/box/../front/../box/tiny_front_config_box.h \
|
|
|
|
|
core/box/../front/../box/../hakmem_build_flags.h \
|
|
|
|
|
core/box/../front/../tiny_region_id.h core/box/../front/../hakmem_tiny.h \
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/../front/../box/tiny_env_box.h \
|
2025-11-30 15:27:53 +09:00
|
|
|
core/box/../front/../box/tiny_front_hot_box.h \
|
|
|
|
|
core/box/../front/../box/../hakmem_tiny_config.h \
|
|
|
|
|
core/box/../front/../box/../tiny_region_id.h \
|
|
|
|
|
core/box/../front/../box/../front/tiny_unified_cache.h \
|
|
|
|
|
core/box/../front/../box/tiny_front_cold_box.h \
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/../front/../box/tiny_layout_box.h \
|
2025-12-08 21:30:21 +09:00
|
|
|
core/box/../front/../box/tiny_hotheap_v2_box.h \
|
2025-12-09 21:50:15 +09:00
|
|
|
core/box/../front/../box/smallobject_hotbox_v3_box.h \
|
|
|
|
|
core/box/../front/../box/tiny_geometry_box.h \
|
|
|
|
|
core/box/../front/../box/smallobject_hotbox_v3_env_box.h \
|
2025-12-10 22:57:26 +09:00
|
|
|
core/box/../front/../box/smallobject_hotbox_v4_box.h \
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
core/box/../front/../box/smallobject_hotbox_v5_box.h \
|
2025-12-11 23:51:48 +09:00
|
|
|
core/box/../front/../box/smallobject_core_v6_box.h \
|
|
|
|
|
core/box/../front/../box/smallobject_v6_env_box.h \
|
|
|
|
|
core/box/../front/../box/tiny_route_env_box.h \
|
|
|
|
|
core/box/../front/../box/free_dispatch_stats_box.h \
|
|
|
|
|
core/box/../front/../box/smallobject_hotbox_v4_env_box.h \
|
|
|
|
|
core/box/../front/../box/smallobject_v5_env_box.h \
|
2025-12-12 03:12:28 +09:00
|
|
|
core/box/../front/../box/smallobject_hotbox_v7_box.h \
|
|
|
|
|
core/box/../front/../box/smallsegment_v7_box.h \
|
|
|
|
|
core/box/../front/../box/smallobject_cold_iface_v7_box.h \
|
|
|
|
|
core/box/../front/../box/region_id_v6_box.h \
|
2025-12-12 03:50:58 +09:00
|
|
|
core/box/../front/../box/smallobject_policy_v7_box.h \
|
2025-12-12 07:12:24 +09:00
|
|
|
core/box/../front/../box/smallobject_learner_v7_box.h \
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/../front/../box/tiny_static_route_box.h \
|
|
|
|
|
core/box/../front/../box/smallobject_policy_v7_box.h \
|
2025-12-12 07:12:24 +09:00
|
|
|
core/box/../front/../box/smallobject_mid_v35_box.h \
|
2025-12-10 22:57:26 +09:00
|
|
|
core/box/../front/../box/tiny_c7_ultra_box.h \
|
|
|
|
|
core/box/../front/../box/tiny_c7_ultra_segment_box.h \
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
core/box/../front/../box/tiny_c6_ultra_free_box.h \
|
|
|
|
|
core/box/../front/../box/tiny_c6_ultra_free_env_box.h \
|
2025-12-11 19:26:51 +09:00
|
|
|
core/box/../front/../box/tiny_c5_ultra_free_box.h \
|
|
|
|
|
core/box/../front/../box/tiny_c5_ultra_free_env_box.h \
|
2025-12-11 19:38:27 +09:00
|
|
|
core/box/../front/../box/tiny_c4_ultra_free_box.h \
|
|
|
|
|
core/box/../front/../box/tiny_c4_ultra_free_env_box.h \
|
2025-12-12 16:26:42 +09:00
|
|
|
core/box/../front/../box/tiny_ultra_tls_box.h \
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
core/box/../front/../box/tiny_ultra_classes_box.h \
|
|
|
|
|
core/box/../front/../box/tiny_legacy_fallback_box.h \
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/../front/../box/../front/tiny_first_page_cache.h \
|
|
|
|
|
core/box/../front/../box/../front/../hakmem_tiny_config.h \
|
2025-12-09 21:50:15 +09:00
|
|
|
core/box/../front/../box/tiny_front_v3_env_box.h \
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
core/box/../front/../box/free_path_stats_box.h \
|
|
|
|
|
core/box/../front/../box/tiny_front_hot_box.h \
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/../front/../box/tiny_metadata_cache_env_box.h \
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
core/box/../front/../box/tiny_ptr_convert_box.h \
|
2025-12-08 21:30:21 +09:00
|
|
|
core/box/../front/../box/tiny_front_stats_box.h \
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
core/box/../front/../box/free_path_stats_box.h \
|
2025-12-11 21:32:40 +09:00
|
|
|
core/box/../front/../box/alloc_gate_stats_box.h \
|
2025-12-12 18:40:08 +09:00
|
|
|
core/box/../front/../box/free_policy_fast_v2_box.h \
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/../front/../box/free_tiny_fast_hotcold_env_box.h \
|
|
|
|
|
core/box/../front/../box/free_tiny_fast_hotcold_stats_box.h \
|
|
|
|
|
core/box/../front/../box/tiny_metadata_cache_hot_box.h \
|
|
|
|
|
core/box/../front/../box/tiny_free_route_cache_env_box.h \
|
2025-12-04 18:01:25 +09:00
|
|
|
core/box/tiny_alloc_gate_box.h core/box/tiny_route_box.h \
|
|
|
|
|
core/box/tiny_front_config_box.h core/box/wrapper_env_box.h \
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/wrapper_env_cache_box.h core/box/wrapper_env_cache_env_box.h \
|
2025-12-07 22:49:28 +09:00
|
|
|
core/box/../hakmem_internal.h
|
2025-11-09 18:55:50 +09:00
|
|
|
core/hakmem.h:
|
|
|
|
|
core/hakmem_build_flags.h:
|
|
|
|
|
core/hakmem_config.h:
|
|
|
|
|
core/hakmem_features.h:
|
|
|
|
|
core/hakmem_internal.h:
|
|
|
|
|
core/hakmem_sys.h:
|
|
|
|
|
core/hakmem_whale.h:
|
2025-12-01 16:37:59 +09:00
|
|
|
core/box/ptr_type_box.h:
|
2025-11-09 18:55:50 +09:00
|
|
|
core/hakmem_bigcache.h:
|
|
|
|
|
core/hakmem_pool.h:
|
2025-12-03 10:34:39 +09:00
|
|
|
core/box/hak_lane_classify.inc.h:
|
2025-11-09 18:55:50 +09:00
|
|
|
core/hakmem_l25_pool.h:
|
|
|
|
|
core/hakmem_policy.h:
|
|
|
|
|
core/hakmem_learner.h:
|
|
|
|
|
core/hakmem_size_hist.h:
|
|
|
|
|
core/hakmem_ace.h:
|
|
|
|
|
core/hakmem_site_rules.h:
|
|
|
|
|
core/hakmem_tiny.h:
|
|
|
|
|
core/hakmem_trace.h:
|
|
|
|
|
core/hakmem_tiny_mini_mag.h:
|
|
|
|
|
core/hakmem_tiny_superslab.h:
|
|
|
|
|
core/superslab/superslab_types.h:
|
|
|
|
|
core/hakmem_tiny_superslab_constants.h:
|
|
|
|
|
core/superslab/superslab_inline.h:
|
|
|
|
|
core/superslab/superslab_types.h:
|
2025-11-26 12:33:49 +09:00
|
|
|
core/superslab/../tiny_box_geometry.h:
|
|
|
|
|
core/superslab/../hakmem_tiny_superslab_constants.h:
|
|
|
|
|
core/superslab/../hakmem_tiny_config.h:
|
2025-12-10 09:08:18 +09:00
|
|
|
core/superslab/../hakmem_super_registry.h:
|
|
|
|
|
core/superslab/../hakmem_tiny_superslab.h:
|
|
|
|
|
core/superslab/../box/ss_addr_map_box.h:
|
|
|
|
|
core/superslab/../box/../hakmem_build_flags.h:
|
|
|
|
|
core/superslab/../box/super_reg_box.h:
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/superslab/../box/ss_pt_lookup_box.h:
|
|
|
|
|
core/superslab/../box/ss_pt_types_box.h:
|
|
|
|
|
core/superslab/../box/ss_pt_env_box.h:
|
|
|
|
|
core/superslab/../box/ss_pt_env_box.h:
|
2025-11-09 18:55:50 +09:00
|
|
|
core/tiny_debug_ring.h:
|
|
|
|
|
core/tiny_remote.h:
|
|
|
|
|
core/hakmem_tiny_superslab_constants.h:
|
|
|
|
|
core/tiny_fastcache.h:
|
2025-12-03 10:34:39 +09:00
|
|
|
core/hakmem_env_cache.h:
|
2025-11-14 01:02:00 +09:00
|
|
|
core/box/tiny_next_ptr_box.h:
|
|
|
|
|
core/hakmem_tiny_config.h:
|
|
|
|
|
core/tiny_nextptr.h:
|
2025-11-21 23:00:24 +09:00
|
|
|
core/tiny_region_id.h:
|
|
|
|
|
core/tiny_box_geometry.h:
|
|
|
|
|
core/ptr_track.h:
|
2025-11-29 06:47:13 +09:00
|
|
|
core/tiny_debug_api.h:
|
2025-12-03 13:28:44 +09:00
|
|
|
core/box/tiny_layout_box.h:
|
|
|
|
|
core/box/../hakmem_tiny_config.h:
|
2025-12-10 09:08:18 +09:00
|
|
|
core/box/../hakmem_build_flags.h:
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/tiny_header_box.h:
|
2025-12-03 13:28:44 +09:00
|
|
|
core/box/tiny_layout_box.h:
|
|
|
|
|
core/box/../tiny_region_id.h:
|
2025-11-09 18:55:50 +09:00
|
|
|
core/hakmem_elo.h:
|
|
|
|
|
core/hakmem_ace_stats.h:
|
|
|
|
|
core/hakmem_batch.h:
|
|
|
|
|
core/hakmem_evo.h:
|
|
|
|
|
core/hakmem_debug.h:
|
|
|
|
|
core/hakmem_prof.h:
|
|
|
|
|
core/hakmem_syscall.h:
|
|
|
|
|
core/hakmem_ace_controller.h:
|
|
|
|
|
core/hakmem_ace_metrics.h:
|
|
|
|
|
core/hakmem_ace_ucb1.h:
|
2025-11-16 07:51:37 +09:00
|
|
|
core/box/bench_fast_box.h:
|
2025-12-12 03:12:28 +09:00
|
|
|
core/box/mid_hotbox_v3_box.h:
|
|
|
|
|
core/box/tiny_geometry_box.h:
|
|
|
|
|
core/box/../hakmem_tiny_superslab_internal.h:
|
|
|
|
|
core/box/../hakmem_build_flags.h:
|
|
|
|
|
core/box/../hakmem_tiny_superslab.h:
|
|
|
|
|
core/box/../box/ss_hot_cold_box.h:
|
|
|
|
|
core/box/../box/../superslab/superslab_types.h:
|
|
|
|
|
core/box/../box/ss_allocation_box.h:
|
|
|
|
|
core/hakmem_tiny_superslab.h:
|
|
|
|
|
core/box/../hakmem_debug_master.h:
|
|
|
|
|
core/box/../hakmem_tiny.h:
|
|
|
|
|
core/box/../hakmem_tiny_config.h:
|
|
|
|
|
core/box/../hakmem_shared_pool.h:
|
|
|
|
|
core/box/../superslab/superslab_types.h:
|
|
|
|
|
core/box/../hakmem_internal.h:
|
|
|
|
|
core/box/../tiny_region_id.h:
|
|
|
|
|
core/box/../hakmem_tiny_integrity.h:
|
|
|
|
|
core/box/../box/slab_freelist_atomic.h:
|
|
|
|
|
core/box/../superslab/superslab_inline.h:
|
|
|
|
|
core/box/mid_hotbox_v3_env_box.h:
|
2025-11-11 00:02:24 +09:00
|
|
|
core/ptr_trace.h:
|
2025-12-04 16:21:54 +09:00
|
|
|
core/hakmem_trace_master.h:
|
|
|
|
|
core/hakmem_stats_master.h:
|
2025-12-03 20:42:28 +09:00
|
|
|
core/box/hak_kpi_util.inc.h:
|
2025-11-09 18:55:50 +09:00
|
|
|
core/box/hak_core_init.inc.h:
|
|
|
|
|
core/hakmem_phase7_config.h:
|
2025-12-10 09:08:18 +09:00
|
|
|
core/box/libm_reloc_guard_box.h:
|
|
|
|
|
core/box/init_bench_preset_box.h:
|
|
|
|
|
core/box/init_diag_box.h:
|
|
|
|
|
core/box/init_env_box.h:
|
|
|
|
|
core/box/../tiny_destructors.h:
|
2025-11-16 07:51:37 +09:00
|
|
|
core/box/ss_hot_prewarm_box.h:
|
2025-11-09 18:55:50 +09:00
|
|
|
core/box/hak_alloc_api.inc.h:
|
2025-11-16 02:37:24 +09:00
|
|
|
core/box/../hakmem_tiny.h:
|
2025-12-03 10:34:39 +09:00
|
|
|
core/box/../hakmem_pool.h:
|
2025-11-16 02:37:24 +09:00
|
|
|
core/box/../hakmem_smallmid.h:
|
2025-12-07 22:49:28 +09:00
|
|
|
core/box/tiny_heap_env_box.h:
|
|
|
|
|
core/box/c7_hotpath_env_box.h:
|
|
|
|
|
core/box/tiny_heap_box.h:
|
|
|
|
|
core/box/../hakmem_tiny_superslab.h:
|
|
|
|
|
core/box/../tiny_tls.h:
|
|
|
|
|
core/box/../tiny_box_geometry.h:
|
2025-12-08 21:30:21 +09:00
|
|
|
core/box/tiny_stats_box.h:
|
2025-12-07 23:06:50 +09:00
|
|
|
core/box/tiny_c7_hotbox.h:
|
2025-11-30 15:27:53 +09:00
|
|
|
core/box/mid_large_config_box.h:
|
|
|
|
|
core/box/../hakmem_config.h:
|
|
|
|
|
core/box/../hakmem_features.h:
|
2025-11-09 18:55:50 +09:00
|
|
|
core/box/hak_free_api.inc.h:
|
2025-12-04 16:21:54 +09:00
|
|
|
core/box/../hakmem_trace_master.h:
|
2025-12-04 12:55:53 +09:00
|
|
|
core/box/front_gate_v2.h:
|
|
|
|
|
core/box/external_guard_box.h:
|
2025-12-04 16:21:54 +09:00
|
|
|
core/box/../hakmem_stats_master.h:
|
2025-12-04 12:55:53 +09:00
|
|
|
core/box/ss_slab_meta_box.h:
|
|
|
|
|
core/box/../superslab/superslab_types.h:
|
|
|
|
|
core/box/slab_freelist_atomic.h:
|
|
|
|
|
core/box/fg_tiny_gate_box.h:
|
|
|
|
|
core/box/tiny_free_gate_box.h:
|
|
|
|
|
core/box/ptr_type_box.h:
|
|
|
|
|
core/box/ptr_conversion_box.h:
|
|
|
|
|
core/box/tiny_ptr_bridge_box.h:
|
|
|
|
|
core/box/../tiny_free_fast_v2.inc.h:
|
2025-11-10 16:48:20 +09:00
|
|
|
core/box/../box/tls_sll_box.h:
|
2025-12-01 16:37:59 +09:00
|
|
|
core/box/../box/../hakmem_internal.h:
|
2025-11-10 16:48:20 +09:00
|
|
|
core/box/../box/../hakmem_tiny_config.h:
|
2025-11-11 00:02:24 +09:00
|
|
|
core/box/../box/../hakmem_build_flags.h:
|
2025-11-29 06:57:03 +09:00
|
|
|
core/box/../box/../hakmem_debug_master.h:
|
Phase E3-FINAL: Fix Box API offset bugs - ALL classes now use correct offsets
## Root Cause Analysis (GPT5)
**Physical Layout Constraints**:
- Class 0: 8B = [1B header][7B payload] → offset 1 = 9B needed = ❌ IMPOSSIBLE
- Class 1-6: >=16B = [1B header][15B+ payload] → offset 1 = ✅ POSSIBLE
- Class 7: 1KB → offset 0 (compatibility)
**Correct Specification**:
- HAKMEM_TINY_HEADER_CLASSIDX != 0:
- Class 0, 7: next at offset 0 (overwrites header when on freelist)
- Class 1-6: next at offset 1 (after header)
- HAKMEM_TINY_HEADER_CLASSIDX == 0:
- All classes: next at offset 0
**Previous Bug**:
- Attempted "ALL classes offset 1" unification
- Class 0 with offset 1 caused immediate SEGV (9B > 8B block size)
- Mixed 2-arg/3-arg API caused confusion
## Fixes Applied
### 1. Restored 3-Argument Box API (core/box/tiny_next_ptr_box.h)
```c
// Correct signatures
void tiny_next_write(int class_idx, void* base, void* next_value)
void* tiny_next_read(int class_idx, const void* base)
// Correct offset calculation
size_t offset = (class_idx == 0 || class_idx == 7) ? 0 : 1;
```
### 2. Updated 123+ Call Sites Across 34 Files
- hakmem_tiny_hot_pop_v4.inc.h (4 locations)
- hakmem_tiny_fastcache.inc.h (3 locations)
- hakmem_tiny_tls_list.h (12 locations)
- superslab_inline.h (5 locations)
- tiny_fastcache.h (3 locations)
- ptr_trace.h (macro definitions)
- tls_sll_box.h (2 locations)
- + 27 additional files
Pattern: `tiny_next_read(base)` → `tiny_next_read(class_idx, base)`
Pattern: `tiny_next_write(base, next)` → `tiny_next_write(class_idx, base, next)`
### 3. Added Sentinel Detection Guards
- tiny_fast_push(): Block nodes with sentinel in ptr or ptr->next
- tls_list_push(): Block nodes with sentinel in ptr or ptr->next
- Defense-in-depth against remote free sentinel leakage
## Verification (GPT5 Report)
**Test Command**: `./out/release/bench_random_mixed_hakmem --iterations=70000`
**Results**:
- ✅ Main loop completed successfully
- ✅ Drain phase completed successfully
- ✅ NO SEGV (previous crash at iteration 66151 is FIXED)
- ℹ️ Final log: "tiny_alloc(1024) failed" is normal fallback to Mid/ACE layers
**Analysis**:
- Class 0 immediate SEGV: ✅ RESOLVED (correct offset 0 now used)
- 66K iteration crash: ✅ RESOLVED (offset consistency fixed)
- Box API conflicts: ✅ RESOLVED (unified 3-arg API)
## Technical Details
### Offset Logic Justification
```
Class 0: 8B block → next pointer (8B) fits ONLY at offset 0
Class 1: 16B block → next pointer (8B) fits at offset 1 (after 1B header)
Class 2: 32B block → next pointer (8B) fits at offset 1
...
Class 6: 512B block → next pointer (8B) fits at offset 1
Class 7: 1024B block → offset 0 for legacy compatibility
```
### Files Modified (Summary)
- Core API: `box/tiny_next_ptr_box.h`
- Hot paths: `hakmem_tiny_hot_pop*.inc.h`, `tiny_fastcache.h`
- TLS layers: `hakmem_tiny_tls_list.h`, `hakmem_tiny_tls_ops.h`
- SuperSlab: `superslab_inline.h`, `tiny_superslab_*.inc.h`
- Refill: `hakmem_tiny_refill.inc.h`, `tiny_refill_opt.h`
- Free paths: `tiny_free_magazine.inc.h`, `tiny_superslab_free.inc.h`
- Documentation: Multiple Phase E3 reports
## Remaining Work
None for Box API offset bugs - all structural issues resolved.
Future enhancements (non-critical):
- Periodic `grep -R '*(void**)' core/` to detect direct pointer access violations
- Enforce Box API usage via static analysis
- Document offset rationale in architecture docs
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 06:50:20 +09:00
|
|
|
core/box/../box/../tiny_remote.h:
|
2025-11-11 10:00:36 +09:00
|
|
|
core/box/../box/../tiny_region_id.h:
|
Add Box I (Integrity), Box E (Expansion), and comprehensive P0 debugging infrastructure
## Major Additions
### 1. Box I: Integrity Verification System (NEW - 703 lines)
- Files: core/box/integrity_box.h (267 lines), core/box/integrity_box.c (436 lines)
- Purpose: Unified integrity checking across all HAKMEM subsystems
- Features:
* 4-level integrity checking (0-4, compile-time controlled)
* Priority 1: TLS array bounds validation
* Priority 2: Freelist pointer validation
* Priority 3: TLS canary monitoring
* Priority ALPHA: Slab metadata invariant checking (5 invariants)
* Atomic statistics tracking (thread-safe)
* Beautiful BOX_BOUNDARY design pattern
### 2. Box E: SuperSlab Expansion System (COMPLETE)
- Files: core/box/superslab_expansion_box.h, core/box/superslab_expansion_box.c
- Purpose: Safe SuperSlab expansion with TLS state guarantee
- Features:
* Immediate slab 0 binding after expansion
* TLS state snapshot and restoration
* Design by Contract (pre/post-conditions, invariants)
* Thread-safe with mutex protection
### 3. Comprehensive Integrity Checking System
- File: core/hakmem_tiny_integrity.h (NEW)
- Unified validation functions for all allocator subsystems
- Uninitialized memory pattern detection (0xa2, 0xcc, 0xdd, 0xfe)
- Pointer range validation (null-page, kernel-space)
### 4. P0 Bug Investigation - Root Cause Identified
**Bug**: SEGV at iteration 28440 (deterministic with seed 42)
**Pattern**: 0xa2a2a2a2a2a2a2a2 (uninitialized/ASan poisoning)
**Location**: TLS SLL (Single-Linked List) cache layer
**Root Cause**: Race condition or use-after-free in TLS list management (class 0)
**Detection**: Box I successfully caught invalid pointer at exact crash point
### 5. Defensive Improvements
- Defensive memset in SuperSlab allocation (all metadata arrays)
- Enhanced pointer validation with pattern detection
- BOX_BOUNDARY markers throughout codebase (beautiful modular design)
- 5 metadata invariant checks in allocation/free/refill paths
## Integration Points
- Modified 13 files with Box I/E integration
- Added 10+ BOX_BOUNDARY markers
- 5 critical integrity check points in P0 refill path
## Test Results (100K iterations)
- Baseline: 7.22M ops/s
- Hotpath ON: 8.98M ops/s (+24% improvement ✓)
- P0 Bug: Still crashes at 28440 iterations (TLS SLL race condition)
- Root cause: Identified but not yet fixed (requires deeper investigation)
## Performance
- Box I overhead: Zero in release builds (HAKMEM_INTEGRITY_LEVEL=0)
- Debug builds: Full validation enabled (HAKMEM_INTEGRITY_LEVEL=4)
- Beautiful modular design maintains clean separation of concerns
## Known Issues
- P0 Bug at 28440 iterations: Race condition in TLS SLL cache (class 0)
- Cause: Use-after-free or race in remote free draining
- Next step: Valgrind investigation to pinpoint exact corruption location
## Code Quality
- Total new code: ~1400 lines (Box I + Box E + integrity system)
- Design: Beautiful Box Theory with clear boundaries
- Modularity: Complete separation of concerns
- Documentation: Comprehensive inline comments and BOX_BOUNDARY markers
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 02:45:00 +09:00
|
|
|
core/box/../box/../hakmem_tiny_integrity.h:
|
Phase E3-FINAL: Fix Box API offset bugs - ALL classes now use correct offsets
## Root Cause Analysis (GPT5)
**Physical Layout Constraints**:
- Class 0: 8B = [1B header][7B payload] → offset 1 = 9B needed = ❌ IMPOSSIBLE
- Class 1-6: >=16B = [1B header][15B+ payload] → offset 1 = ✅ POSSIBLE
- Class 7: 1KB → offset 0 (compatibility)
**Correct Specification**:
- HAKMEM_TINY_HEADER_CLASSIDX != 0:
- Class 0, 7: next at offset 0 (overwrites header when on freelist)
- Class 1-6: next at offset 1 (after header)
- HAKMEM_TINY_HEADER_CLASSIDX == 0:
- All classes: next at offset 0
**Previous Bug**:
- Attempted "ALL classes offset 1" unification
- Class 0 with offset 1 caused immediate SEGV (9B > 8B block size)
- Mixed 2-arg/3-arg API caused confusion
## Fixes Applied
### 1. Restored 3-Argument Box API (core/box/tiny_next_ptr_box.h)
```c
// Correct signatures
void tiny_next_write(int class_idx, void* base, void* next_value)
void* tiny_next_read(int class_idx, const void* base)
// Correct offset calculation
size_t offset = (class_idx == 0 || class_idx == 7) ? 0 : 1;
```
### 2. Updated 123+ Call Sites Across 34 Files
- hakmem_tiny_hot_pop_v4.inc.h (4 locations)
- hakmem_tiny_fastcache.inc.h (3 locations)
- hakmem_tiny_tls_list.h (12 locations)
- superslab_inline.h (5 locations)
- tiny_fastcache.h (3 locations)
- ptr_trace.h (macro definitions)
- tls_sll_box.h (2 locations)
- + 27 additional files
Pattern: `tiny_next_read(base)` → `tiny_next_read(class_idx, base)`
Pattern: `tiny_next_write(base, next)` → `tiny_next_write(class_idx, base, next)`
### 3. Added Sentinel Detection Guards
- tiny_fast_push(): Block nodes with sentinel in ptr or ptr->next
- tls_list_push(): Block nodes with sentinel in ptr or ptr->next
- Defense-in-depth against remote free sentinel leakage
## Verification (GPT5 Report)
**Test Command**: `./out/release/bench_random_mixed_hakmem --iterations=70000`
**Results**:
- ✅ Main loop completed successfully
- ✅ Drain phase completed successfully
- ✅ NO SEGV (previous crash at iteration 66151 is FIXED)
- ℹ️ Final log: "tiny_alloc(1024) failed" is normal fallback to Mid/ACE layers
**Analysis**:
- Class 0 immediate SEGV: ✅ RESOLVED (correct offset 0 now used)
- 66K iteration crash: ✅ RESOLVED (offset consistency fixed)
- Box API conflicts: ✅ RESOLVED (unified 3-arg API)
## Technical Details
### Offset Logic Justification
```
Class 0: 8B block → next pointer (8B) fits ONLY at offset 0
Class 1: 16B block → next pointer (8B) fits at offset 1 (after 1B header)
Class 2: 32B block → next pointer (8B) fits at offset 1
...
Class 6: 512B block → next pointer (8B) fits at offset 1
Class 7: 1024B block → offset 0 for legacy compatibility
```
### Files Modified (Summary)
- Core API: `box/tiny_next_ptr_box.h`
- Hot paths: `hakmem_tiny_hot_pop*.inc.h`, `tiny_fastcache.h`
- TLS layers: `hakmem_tiny_tls_list.h`, `hakmem_tiny_tls_ops.h`
- SuperSlab: `superslab_inline.h`, `tiny_superslab_*.inc.h`
- Refill: `hakmem_tiny_refill.inc.h`, `tiny_refill_opt.h`
- Free paths: `tiny_free_magazine.inc.h`, `tiny_superslab_free.inc.h`
- Documentation: Multiple Phase E3 reports
## Remaining Work
None for Box API offset bugs - all structural issues resolved.
Future enhancements (non-critical):
- Periodic `grep -R '*(void**)' core/` to detect direct pointer access violations
- Enforce Box API usage via static analysis
- Document offset rationale in architecture docs
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 06:50:20 +09:00
|
|
|
core/box/../box/../ptr_track.h:
|
Front-Direct implementation: SS→FC direct refill + SLL complete bypass
## Summary
Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)
## New Modules
- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
- Remote drain → Freelist → Carve priority
- Header restoration for C1-C6 (NOT C0/C7)
- ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN
- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition
## Allocation Path (core/tiny_alloc_fast.inc.h)
- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
- Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
- Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)
## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)
- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)
## Legacy Sealing
- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry
## ENV Controls
- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)
## Benchmarks (Front-Direct Enabled)
```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
HAKMEM_TINY_BUMP_CHUNK=256
bench_random_mixed (16-1040B random, 200K iter):
256 slots: 1.44M ops/s (STABLE, 0 SEGV)
128 slots: 1.44M ops/s (STABLE, 0 SEGV)
bench_fixed_size (fixed size, 200K iter):
256B: 4.06M ops/s (has debug logs, expected >10M without logs)
128B: Similar (debug logs affect)
```
## Verification
- TRACE_RING test (10K iter): **0 SLL events** detected ✅
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV
## Next Steps
- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)
Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink
2025-11-14 05:41:49 +09:00
|
|
|
core/box/../box/../tiny_debug_ring.h:
|
2025-12-04 12:55:53 +09:00
|
|
|
core/box/../box/ss_addr_map_box.h:
|
2025-11-21 23:00:24 +09:00
|
|
|
core/box/../box/../superslab/superslab_inline.h:
|
2025-12-04 12:55:53 +09:00
|
|
|
core/box/../box/tiny_ptr_bridge_box.h:
|
2025-11-30 15:27:53 +09:00
|
|
|
core/box/../box/tiny_header_box.h:
|
2025-11-14 07:10:46 +09:00
|
|
|
core/box/../box/tls_sll_drain_box.h:
|
|
|
|
|
core/box/../box/tls_sll_box.h:
|
2025-11-30 15:27:53 +09:00
|
|
|
core/box/../box/slab_recycling_box.h:
|
|
|
|
|
core/box/../box/../hakmem_tiny_superslab.h:
|
|
|
|
|
core/box/../box/ss_hot_cold_box.h:
|
2025-12-04 12:55:53 +09:00
|
|
|
core/box/../box/ss_release_guard_box.h:
|
|
|
|
|
core/box/../box/../hakmem_tiny_superslab_internal.h:
|
2025-11-14 07:10:46 +09:00
|
|
|
core/box/../box/free_local_box.h:
|
2025-12-01 16:37:59 +09:00
|
|
|
core/box/../box/ptr_type_box.h:
|
|
|
|
|
core/box/../box/free_publish_box.h:
|
|
|
|
|
core/hakmem_tiny.h:
|
|
|
|
|
core/tiny_region_id.h:
|
2025-12-03 10:34:39 +09:00
|
|
|
core/box/../hakmem_env_cache.h:
|
Phase 23 Unified Cache + PageFaultTelemetry generalization: Mid/VM page-fault bottleneck identified
Summary:
- Phase 23 Unified Cache: +30% improvement (Random Mixed 256B: 18.18M → 23.68M ops/s)
- PageFaultTelemetry: Extended to generic buckets (C0-C7, MID, L25, SSM)
- Measurement-driven decision: Mid/VM page-faults (80-100K) >> Tiny (6K) → prioritize Mid/VM optimization
Phase 23 Changes:
1. Unified Cache implementation (core/front/tiny_unified_cache.{c,h})
- Direct SuperSlab carve (TLS SLL bypass)
- Self-contained pop-or-refill pattern
- ENV: HAKMEM_TINY_UNIFIED_CACHE=1, HAKMEM_TINY_UNIFIED_C{0-7}=128
2. Fast path pruning (tiny_alloc_fast.inc.h, tiny_free_fast_v2.inc.h)
- Unified ON → direct cache access (skip all intermediate layers)
- Alloc: unified_cache_pop_or_refill() → immediate fail to slow
- Free: unified_cache_push() → fallback to SLL only if full
PageFaultTelemetry Changes:
3. Generic bucket architecture (core/box/pagefault_telemetry_box.{c,h})
- PF_BUCKET_{C0-C7, MID, L25, SSM} for domain-specific measurement
- Integration: hak_pool_try_alloc(), l25_alloc_new_run(), shared_pool_allocate_superslab_unlocked()
4. Measurement results (Random Mixed 500K / 256B):
- Tiny C2-C7: 2-33 pages, high reuse (64-3.8 touches/page)
- SSM: 512 pages (initialization footprint)
- MID/L25: 0 (unused in this workload)
- Mid/Large VM benchmarks: 80-100K page-faults (13-16x higher than Tiny)
Ring Cache Enhancements:
5. Hot Ring Cache (core/front/tiny_ring_cache.{c,h})
- ENV: HAKMEM_TINY_HOT_RING_ENABLE=1, HAKMEM_TINY_HOT_RING_C{0-7}=size
- Conditional compilation cleanup
Documentation:
6. Analysis reports
- RANDOM_MIXED_BOTTLENECK_ANALYSIS.md: Page-fault breakdown
- RANDOM_MIXED_SUMMARY.md: Phase 23 summary
- RING_CACHE_ACTIVATION_GUIDE.md: Ring cache usage
- CURRENT_TASK.md: Updated with Phase 23 results and Phase 24 plan
Next Steps (Phase 24):
- Target: Mid/VM PageArena/HotSpanBox (page-fault reduction 80-100K → 30-40K)
- Tiny SSM optimization deferred (low ROI, ~6K page-faults already optimal)
- Expected improvement: +30-50% for Mid/Large workloads
Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-17 02:47:58 +09:00
|
|
|
core/box/../superslab/superslab_inline.h:
|
2025-11-20 07:32:30 +09:00
|
|
|
core/box/../box/ss_slab_meta_box.h:
|
Phase 23 Unified Cache + PageFaultTelemetry generalization: Mid/VM page-fault bottleneck identified
Summary:
- Phase 23 Unified Cache: +30% improvement (Random Mixed 256B: 18.18M → 23.68M ops/s)
- PageFaultTelemetry: Extended to generic buckets (C0-C7, MID, L25, SSM)
- Measurement-driven decision: Mid/VM page-faults (80-100K) >> Tiny (6K) → prioritize Mid/VM optimization
Phase 23 Changes:
1. Unified Cache implementation (core/front/tiny_unified_cache.{c,h})
- Direct SuperSlab carve (TLS SLL bypass)
- Self-contained pop-or-refill pattern
- ENV: HAKMEM_TINY_UNIFIED_CACHE=1, HAKMEM_TINY_UNIFIED_C{0-7}=128
2. Fast path pruning (tiny_alloc_fast.inc.h, tiny_free_fast_v2.inc.h)
- Unified ON → direct cache access (skip all intermediate layers)
- Alloc: unified_cache_pop_or_refill() → immediate fail to slow
- Free: unified_cache_push() → fallback to SLL only if full
PageFaultTelemetry Changes:
3. Generic bucket architecture (core/box/pagefault_telemetry_box.{c,h})
- PF_BUCKET_{C0-C7, MID, L25, SSM} for domain-specific measurement
- Integration: hak_pool_try_alloc(), l25_alloc_new_run(), shared_pool_allocate_superslab_unlocked()
4. Measurement results (Random Mixed 500K / 256B):
- Tiny C2-C7: 2-33 pages, high reuse (64-3.8 touches/page)
- SSM: 512 pages (initialization footprint)
- MID/L25: 0 (unused in this workload)
- Mid/Large VM benchmarks: 80-100K page-faults (13-16x higher than Tiny)
Ring Cache Enhancements:
5. Hot Ring Cache (core/front/tiny_ring_cache.{c,h})
- ENV: HAKMEM_TINY_HOT_RING_ENABLE=1, HAKMEM_TINY_HOT_RING_C{0-7}=size
- Conditional compilation cleanup
Documentation:
6. Analysis reports
- RANDOM_MIXED_BOTTLENECK_ANALYSIS.md: Page-fault breakdown
- RANDOM_MIXED_SUMMARY.md: Phase 23 summary
- RING_CACHE_ACTIVATION_GUIDE.md: Ring cache usage
- CURRENT_TASK.md: Updated with Phase 23 results and Phase 24 plan
Next Steps (Phase 24):
- Target: Mid/VM PageArena/HotSpanBox (page-fault reduction 80-100K → 30-40K)
- Tiny SSM optimization deferred (low ROI, ~6K page-faults already optimal)
- Expected improvement: +30-50% for Mid/Large workloads
Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-17 02:47:58 +09:00
|
|
|
core/box/../box/free_remote_box.h:
|
2025-12-01 16:37:59 +09:00
|
|
|
core/hakmem_tiny_integrity.h:
|
2025-12-03 13:28:44 +09:00
|
|
|
core/box/../box/ptr_conversion_box.h:
|
Phase FREE-DISPATCHER-OPT-1: free dispatcher 統計計測
**目的**: free dispatcher(29%)の内訳を細分化して計測。
**実装内容**:
- FreeDispatchStats 構造体追加(ENV: HAKMEM_FREE_DISPATCH_STATS, default 0)
- カウンタ: total_calls / domain (tiny/mid/large) / route (ultra/legacy/pool/v6) / env_checks / route_for_class_calls
- hak_free_at / tiny_route_for_class / tiny_route_snapshot_init にカウンタ埋め込み
- 挙動変更なし(計測のみ、ENV OFF 時は overhead ゼロ)
**計測結果**:
Mixed 16-1024B (1M iter, ws=400):
- total=8,081, route_calls=267,967, env_checks=9
- BENCH_FAST_FRONT により大半は早期リターン
- route_for_class は主に alloc 側で呼ばれる(267k calls vs 8k frees)
- ENV check は初期化時の 9回のみ(snapshot 効果)
C6-heavy (257-768B, 1M iter, ws=400):
- total=500,099, route_calls=1,034, env_checks=9
- fg_classify_domain に到達する free が多い
- route_for_class 呼び出しは極小(snapshot 効果)
**結論**:
- ENV check は既に十分最適化されている(初期化時のみ)
- route_for_class は alloc 側での呼び出しが主で、free 側は snapshot で O(1)
- 次フェーズ(OPT-2)では別のアプローチを検討
**ドキュメント追加**:
- docs/analysis/FREE_DISPATCHER_ANALYSIS.md(新規)
- CURRENT_TASK.md に Phase FREE-DISPATCHER-OPT-1 セクション追加
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-11 21:21:40 +09:00
|
|
|
core/box/free_dispatch_stats_box.h:
|
2025-12-12 03:12:28 +09:00
|
|
|
core/box/region_id_v6_box.h:
|
|
|
|
|
core/box/smallsegment_v6_box.h:
|
2025-11-09 18:55:50 +09:00
|
|
|
core/box/hak_wrappers.inc.h:
|
2025-11-15 23:00:21 +09:00
|
|
|
core/box/front_gate_classifier.h:
|
2025-11-20 07:32:30 +09:00
|
|
|
core/box/../front/malloc_tiny_fast.h:
|
|
|
|
|
core/box/../front/../hakmem_build_flags.h:
|
|
|
|
|
core/box/../front/../hakmem_tiny_config.h:
|
2025-11-28 01:45:45 +09:00
|
|
|
core/box/../front/../superslab/superslab_inline.h:
|
|
|
|
|
core/box/../front/../box/ss_slab_meta_box.h:
|
2025-11-20 07:32:30 +09:00
|
|
|
core/box/../front/tiny_unified_cache.h:
|
2025-12-04 12:55:53 +09:00
|
|
|
core/box/../front/../box/ptr_type_box.h:
|
2025-11-30 15:27:53 +09:00
|
|
|
core/box/../front/../box/tiny_front_config_box.h:
|
|
|
|
|
core/box/../front/../box/../hakmem_build_flags.h:
|
2025-11-20 07:32:30 +09:00
|
|
|
core/box/../front/../tiny_region_id.h:
|
|
|
|
|
core/box/../front/../hakmem_tiny.h:
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/../front/../box/tiny_env_box.h:
|
2025-11-30 15:27:53 +09:00
|
|
|
core/box/../front/../box/tiny_front_hot_box.h:
|
|
|
|
|
core/box/../front/../box/../hakmem_tiny_config.h:
|
|
|
|
|
core/box/../front/../box/../tiny_region_id.h:
|
|
|
|
|
core/box/../front/../box/../front/tiny_unified_cache.h:
|
|
|
|
|
core/box/../front/../box/tiny_front_cold_box.h:
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/../front/../box/tiny_layout_box.h:
|
2025-12-08 21:30:21 +09:00
|
|
|
core/box/../front/../box/tiny_hotheap_v2_box.h:
|
2025-12-09 21:50:15 +09:00
|
|
|
core/box/../front/../box/smallobject_hotbox_v3_box.h:
|
|
|
|
|
core/box/../front/../box/tiny_geometry_box.h:
|
|
|
|
|
core/box/../front/../box/smallobject_hotbox_v3_env_box.h:
|
2025-12-10 22:57:26 +09:00
|
|
|
core/box/../front/../box/smallobject_hotbox_v4_box.h:
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
core/box/../front/../box/smallobject_hotbox_v5_box.h:
|
2025-12-11 23:51:48 +09:00
|
|
|
core/box/../front/../box/smallobject_core_v6_box.h:
|
|
|
|
|
core/box/../front/../box/smallobject_v6_env_box.h:
|
|
|
|
|
core/box/../front/../box/tiny_route_env_box.h:
|
|
|
|
|
core/box/../front/../box/free_dispatch_stats_box.h:
|
|
|
|
|
core/box/../front/../box/smallobject_hotbox_v4_env_box.h:
|
|
|
|
|
core/box/../front/../box/smallobject_v5_env_box.h:
|
2025-12-12 03:12:28 +09:00
|
|
|
core/box/../front/../box/smallobject_hotbox_v7_box.h:
|
|
|
|
|
core/box/../front/../box/smallsegment_v7_box.h:
|
|
|
|
|
core/box/../front/../box/smallobject_cold_iface_v7_box.h:
|
|
|
|
|
core/box/../front/../box/region_id_v6_box.h:
|
2025-12-12 03:50:58 +09:00
|
|
|
core/box/../front/../box/smallobject_policy_v7_box.h:
|
2025-12-12 07:12:24 +09:00
|
|
|
core/box/../front/../box/smallobject_learner_v7_box.h:
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/../front/../box/tiny_static_route_box.h:
|
|
|
|
|
core/box/../front/../box/smallobject_policy_v7_box.h:
|
2025-12-12 07:12:24 +09:00
|
|
|
core/box/../front/../box/smallobject_mid_v35_box.h:
|
2025-12-10 22:57:26 +09:00
|
|
|
core/box/../front/../box/tiny_c7_ultra_box.h:
|
|
|
|
|
core/box/../front/../box/tiny_c7_ultra_segment_box.h:
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
core/box/../front/../box/tiny_c6_ultra_free_box.h:
|
|
|
|
|
core/box/../front/../box/tiny_c6_ultra_free_env_box.h:
|
2025-12-11 19:26:51 +09:00
|
|
|
core/box/../front/../box/tiny_c5_ultra_free_box.h:
|
|
|
|
|
core/box/../front/../box/tiny_c5_ultra_free_env_box.h:
|
2025-12-11 19:38:27 +09:00
|
|
|
core/box/../front/../box/tiny_c4_ultra_free_box.h:
|
|
|
|
|
core/box/../front/../box/tiny_c4_ultra_free_env_box.h:
|
2025-12-12 16:26:42 +09:00
|
|
|
core/box/../front/../box/tiny_ultra_tls_box.h:
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
core/box/../front/../box/tiny_ultra_classes_box.h:
|
|
|
|
|
core/box/../front/../box/tiny_legacy_fallback_box.h:
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/../front/../box/../front/tiny_first_page_cache.h:
|
|
|
|
|
core/box/../front/../box/../front/../hakmem_tiny_config.h:
|
2025-12-09 21:50:15 +09:00
|
|
|
core/box/../front/../box/tiny_front_v3_env_box.h:
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
core/box/../front/../box/free_path_stats_box.h:
|
|
|
|
|
core/box/../front/../box/tiny_front_hot_box.h:
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/../front/../box/tiny_metadata_cache_env_box.h:
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
core/box/../front/../box/tiny_ptr_convert_box.h:
|
2025-12-08 21:30:21 +09:00
|
|
|
core/box/../front/../box/tiny_front_stats_box.h:
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
core/box/../front/../box/free_path_stats_box.h:
|
2025-12-11 21:32:40 +09:00
|
|
|
core/box/../front/../box/alloc_gate_stats_box.h:
|
2025-12-12 18:40:08 +09:00
|
|
|
core/box/../front/../box/free_policy_fast_v2_box.h:
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/../front/../box/free_tiny_fast_hotcold_env_box.h:
|
|
|
|
|
core/box/../front/../box/free_tiny_fast_hotcold_stats_box.h:
|
|
|
|
|
core/box/../front/../box/tiny_metadata_cache_hot_box.h:
|
|
|
|
|
core/box/../front/../box/tiny_free_route_cache_env_box.h:
|
2025-12-04 12:55:53 +09:00
|
|
|
core/box/tiny_alloc_gate_box.h:
|
2025-12-04 18:01:25 +09:00
|
|
|
core/box/tiny_route_box.h:
|
2025-11-30 15:27:53 +09:00
|
|
|
core/box/tiny_front_config_box.h:
|
2025-12-03 10:34:39 +09:00
|
|
|
core/box/wrapper_env_box.h:
|
Phase 3 D2: Wrapper Env Cache - [DECISION: NO-GO]
Target: Reduce wrapper_env_cfg() overhead in malloc/free hot path
- Strategy: Cache wrapper env configuration pointer in TLS
- Approach: Fast pointer cache (TLS caches const wrapper_env_cfg_t*)
Implementation:
- core/box/wrapper_env_cache_env_box.h: ENV gate (HAKMEM_WRAP_ENV_CACHE)
- core/box/wrapper_env_cache_box.h: TLS cache layer (wrapper_env_cfg_fast)
- core/box/hak_wrappers.inc.h: Integration into malloc/free hot paths
- ENV gate: HAKMEM_WRAP_ENV_CACHE=0/1 (default OFF)
A/B Test Results (Mixed, 10-run, 20M iters):
- Baseline (D2=0): 46.52M ops/s (avg), 46.47M ops/s (median)
- Optimized (D2=1): 45.85M ops/s (avg), 45.98M ops/s (median)
- Improvement: avg -1.44%, median -1.05% (DECISION: NO-GO)
Analysis:
- Regression cause: TLS cache adds overhead (branch + TLS access)
- wrapper_env_cfg() is already minimal (pointer return after simple check)
- Adding TLS caching layer makes it worse, not better
- Branch prediction penalty outweighs any potential savings
Cumulative Phase 2-3:
- B3: +2.89%, B4: +1.47%, C3: +2.20%
- D1: +1.06% (opt-in), D2: -1.44% (NO-GO)
- Total: ~7.2% (excluding D2)
Decision: FREEZE as research box (default OFF, regression confirmed)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 22:03:27 +09:00
|
|
|
core/box/wrapper_env_cache_box.h:
|
|
|
|
|
core/box/wrapper_env_cache_env_box.h:
|
2025-12-03 13:28:44 +09:00
|
|
|
core/box/../hakmem_internal.h:
|