Phase 12: Shared SuperSlab Pool implementation (WIP - runtime crash)
## Summary
Implemented Phase 12 Shared SuperSlab Pool (mimalloc-style) to address
SuperSlab allocation churn (877 SuperSlabs → 100-200 target).
## Implementation (ChatGPT + Claude)
1. **Metadata changes** (superslab_types.h):
- Added class_idx to TinySlabMeta (per-slab dynamic class)
- Removed size_class from SuperSlab (no longer per-SuperSlab)
- Changed owner_tid (16-bit) → owner_tid_low (8-bit)
2. **Shared Pool** (hakmem_shared_pool.{h,c}):
- Global pool shared by all size classes
- shared_pool_acquire_slab() - Get free slab for class_idx
- shared_pool_release_slab() - Return slab when empty
- Per-class hints for fast path optimization
3. **Integration** (23 files modified):
- Updated all ss->size_class → meta->class_idx
- Updated all meta->owner_tid → meta->owner_tid_low
- superslab_refill() now uses shared pool
- Free path releases empty slabs back to pool
4. **Build system** (Makefile):
- Added hakmem_shared_pool.o to OBJS_BASE and TINY_BENCH_OBJS_BASE
## Status: ⚠️ Build OK, Runtime CRASH
**Build**: ✅ SUCCESS
- All 23 files compile without errors
- Only warnings: superslab_allocate type mismatch (legacy code)
**Runtime**: ❌ SEGFAULT
- Crash location: sll_refill_small_from_ss()
- Exit code: 139 (SIGSEGV)
- Test case: ./bench_random_mixed_hakmem 1000 256 42
## Known Issues
1. **SEGFAULT in refill path** - Likely shared_pool_acquire_slab() issue
2. **Legacy superslab_allocate()** still exists (type mismatch warning)
3. **Remaining TODOs** from design doc:
- SuperSlab physical layout integration
- slab_handle.h cleanup
- Remove old per-class head implementation
## Next Steps
1. Debug SEGFAULT (gdb backtrace shows sll_refill_small_from_ss)
2. Fix shared_pool_acquire_slab() or superslab_init_slab()
3. Basic functionality test (1K → 100K iterations)
4. Measure SuperSlab count reduction (877 → 100-200)
5. Performance benchmark (+650-860% expected)
## Files Changed (25 files)
core/box/free_local_box.c
core/box/free_remote_box.c
core/box/front_gate_classifier.c
core/hakmem_super_registry.c
core/hakmem_tiny.c
core/hakmem_tiny_bg_spill.c
core/hakmem_tiny_free.inc
core/hakmem_tiny_lifecycle.inc
core/hakmem_tiny_magazine.c
core/hakmem_tiny_query.c
core/hakmem_tiny_refill.inc.h
core/hakmem_tiny_superslab.c
core/hakmem_tiny_superslab.h
core/hakmem_tiny_tls_ops.h
core/slab_handle.h
core/superslab/superslab_inline.h
core/superslab/superslab_types.h
core/tiny_debug.h
core/tiny_free_fast.inc.h
core/tiny_free_magazine.inc.h
core/tiny_remote.c
core/tiny_superslab_alloc.inc.h
core/tiny_superslab_free.inc.h
Makefile
## New Files (3 files)
PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md
core/hakmem_shared_pool.c
core/hakmem_shared_pool.h
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
This commit is contained in:
4
Makefile
4
Makefile
@ -179,7 +179,7 @@ LDFLAGS += $(EXTRA_LDFLAGS)
|
||||
|
||||
# Targets
|
||||
TARGET = test_hakmem
|
||||
OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/link_stubs.o test_hakmem.o
|
||||
OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/link_stubs.o test_hakmem.o
|
||||
OBJS = $(OBJS_BASE)
|
||||
|
||||
# Shared library
|
||||
@ -380,7 +380,7 @@ test-box-refactor: box-refactor
|
||||
./larson_hakmem 10 8 128 1024 1 12345 4
|
||||
|
||||
# Phase 4: Tiny Pool benchmarks (properly linked with hakmem)
|
||||
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/link_stubs.o
|
||||
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/link_stubs.o
|
||||
TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE)
|
||||
ifeq ($(POOL_TLS_PHASE1),1)
|
||||
TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
|
||||
|
||||
423
PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md
Normal file
423
PHASE12_SHARED_SUPERSLAB_POOL_DESIGN.md
Normal file
@ -0,0 +1,423 @@
|
||||
# Phase 12: Shared SuperSlab Pool - Design Document
|
||||
|
||||
**Date**: 2025-11-13
|
||||
**Goal**: System malloc parity (90M ops/s) via mimalloc-style shared SuperSlab architecture
|
||||
**Expected Impact**: SuperSlab count 877 → 100-200 (-70-80%), +650-860% performance
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Problem Statement
|
||||
|
||||
### Root Cause: Fixed Size Class Architecture
|
||||
|
||||
**Current Design** (Phase 11):
|
||||
```c
|
||||
// SuperSlab is bound to ONE size class
|
||||
struct SuperSlab {
|
||||
uint8_t size_class; // FIXED at allocation time (0-7)
|
||||
// ... 32 slabs, all for the SAME class
|
||||
};
|
||||
|
||||
// 8 independent SuperSlabHead structures (one per class)
|
||||
SuperSlabHead g_superslab_heads[8]; // Each class manages its own pool
|
||||
```
|
||||
|
||||
**Problem**:
|
||||
- Benchmark (100K iterations, 256B): **877 SuperSlabs allocated**
|
||||
- Memory usage: 877MB (877 × 1MB SuperSlabs)
|
||||
- Metadata overhead: 877 × ~2KB headers = ~1.8MB
|
||||
- **Each size class independently allocates SuperSlabs** → massive churn
|
||||
|
||||
**Why 877?**:
|
||||
```
|
||||
Class 0 (8B): ~100 SuperSlabs
|
||||
Class 1 (16B): ~120 SuperSlabs
|
||||
Class 2 (32B): ~150 SuperSlabs
|
||||
Class 3 (64B): ~180 SuperSlabs
|
||||
Class 4 (128B): ~140 SuperSlabs
|
||||
Class 5 (256B): ~187 SuperSlabs ← Target class for benchmark
|
||||
Class 6 (512B): ~80 SuperSlabs
|
||||
Class 7 (1KB): ~20 SuperSlabs
|
||||
Total: 877 SuperSlabs
|
||||
```
|
||||
|
||||
**Performance Impact**:
|
||||
- Massive metadata traversal overhead
|
||||
- Poor cache locality (877 scattered 1MB regions)
|
||||
- Excessive TLB pressure
|
||||
- SuperSlab allocation churn dominates runtime
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Solution: Shared SuperSlab Pool (mimalloc-style)
|
||||
|
||||
### Core Concept
|
||||
|
||||
**New Design** (Phase 12):
|
||||
```c
|
||||
// SuperSlab is NOT bound to any class - slabs are dynamically assigned
|
||||
struct SuperSlab {
|
||||
// NO size_class field! Each slab has its own class_idx
|
||||
uint8_t active_slabs; // Number of active slabs (any class)
|
||||
uint32_t slab_bitmap; // 32-bit bitmap (1=active, 0=free)
|
||||
// ... 32 slabs, EACH can be a different size class
|
||||
};
|
||||
|
||||
// Single global pool (shared by all classes)
|
||||
typedef struct SharedSuperSlabPool {
|
||||
SuperSlab** slabs; // Array of all SuperSlabs
|
||||
uint32_t total_count; // Total SuperSlabs allocated
|
||||
uint32_t active_count; // SuperSlabs with active slabs
|
||||
pthread_mutex_t lock; // Allocation lock
|
||||
|
||||
// Per-class hints (fast path optimization)
|
||||
SuperSlab* class_hints[8]; // Last known SuperSlab with free space per class
|
||||
} SharedSuperSlabPool;
|
||||
```
|
||||
|
||||
### Per-Slab Dynamic Class Assignment
|
||||
|
||||
**Old** (TinySlabMeta):
|
||||
```c
|
||||
// Slab metadata (16 bytes) - class_idx inherited from SuperSlab
|
||||
typedef struct TinySlabMeta {
|
||||
void* freelist;
|
||||
uint16_t used;
|
||||
uint16_t capacity;
|
||||
uint16_t carved;
|
||||
uint16_t owner_tid;
|
||||
} TinySlabMeta;
|
||||
```
|
||||
|
||||
**New** (Phase 12):
|
||||
```c
|
||||
// Slab metadata (16 bytes) - class_idx is PER-SLAB
|
||||
typedef struct TinySlabMeta {
|
||||
void* freelist;
|
||||
uint16_t used;
|
||||
uint16_t capacity;
|
||||
uint16_t carved;
|
||||
uint8_t class_idx; // NEW: Dynamic class assignment (0-7, 255=unassigned)
|
||||
uint8_t owner_tid_low; // Truncated to 8-bit (from 16-bit)
|
||||
} TinySlabMeta;
|
||||
```
|
||||
|
||||
**Size preserved**: Still 16 bytes (no growth!)
|
||||
|
||||
---
|
||||
|
||||
## 📐 Architecture Changes
|
||||
|
||||
### 1. SuperSlab Structure (superslab_types.h)
|
||||
|
||||
**Remove**:
|
||||
```c
|
||||
uint8_t size_class; // DELETE - no longer per-SuperSlab
|
||||
```
|
||||
|
||||
**Add** (optional, for debugging):
|
||||
```c
|
||||
uint8_t mixed_slab_count; // Number of slabs with different class_idx (stats)
|
||||
```
|
||||
|
||||
### 2. TinySlabMeta Structure (superslab_types.h)
|
||||
|
||||
**Modify**:
|
||||
```c
|
||||
typedef struct TinySlabMeta {
|
||||
void* freelist;
|
||||
uint16_t used;
|
||||
uint16_t capacity;
|
||||
uint16_t carved;
|
||||
uint8_t class_idx; // NEW: 0-7 for active, 255=unassigned
|
||||
uint8_t owner_tid_low; // Changed from uint16_t owner_tid
|
||||
} TinySlabMeta;
|
||||
```
|
||||
|
||||
### 3. Shared Pool Structure (NEW: hakmem_shared_pool.h)
|
||||
|
||||
```c
|
||||
// Global shared pool (singleton)
|
||||
typedef struct SharedSuperSlabPool {
|
||||
SuperSlab** slabs; // Dynamic array of SuperSlab pointers
|
||||
uint32_t capacity; // Array capacity (grows as needed)
|
||||
uint32_t total_count; // Total SuperSlabs allocated
|
||||
uint32_t active_count; // SuperSlabs with >0 active slabs
|
||||
|
||||
pthread_mutex_t alloc_lock; // Lock for slab allocation
|
||||
|
||||
// Per-class hints (lock-free read, updated under lock)
|
||||
SuperSlab* class_hints[TINY_NUM_CLASSES];
|
||||
|
||||
// LRU cache integration (Phase 9)
|
||||
SuperSlab* lru_head;
|
||||
SuperSlab* lru_tail;
|
||||
uint32_t lru_count;
|
||||
} SharedSuperSlabPool;
|
||||
|
||||
// Global singleton
|
||||
extern SharedSuperSlabPool g_shared_pool;
|
||||
|
||||
// API
|
||||
void shared_pool_init(void);
|
||||
SuperSlab* shared_pool_acquire_superslab(void); // Get/allocate SuperSlab
|
||||
int shared_pool_acquire_slab(int class_idx, SuperSlab** ss_out, int* slab_idx_out);
|
||||
void shared_pool_release_slab(SuperSlab* ss, int slab_idx);
|
||||
```
|
||||
|
||||
### 4. Allocation Flow (NEW)
|
||||
|
||||
**Old Flow** (Phase 11):
|
||||
```
|
||||
1. TLS cache miss for class C
|
||||
2. Check g_superslab_heads[C].current_chunk
|
||||
3. If no space → allocate NEW SuperSlab for class C
|
||||
4. All 32 slabs in new SuperSlab belong to class C
|
||||
```
|
||||
|
||||
**New Flow** (Phase 12):
|
||||
```
|
||||
1. TLS cache miss for class C
|
||||
2. Check g_shared_pool.class_hints[C]
|
||||
3. If hint has free slab → assign that slab to class C (set class_idx=C)
|
||||
4. If no hint:
|
||||
a. Scan g_shared_pool.slabs[] for any SuperSlab with free slab
|
||||
b. If found → assign slab to class C
|
||||
c. If not found → allocate NEW SuperSlab (add to pool)
|
||||
5. Update class_hints[C] for fast path
|
||||
```
|
||||
|
||||
**Key Benefit**: NEW SuperSlab only allocated when ALL existing SuperSlabs are full!
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Implementation Plan
|
||||
|
||||
### Phase 12-1: Dynamic Slab Metadata ✅ (Current Task)
|
||||
|
||||
**Files to modify**:
|
||||
- `core/superslab/superslab_types.h` - Add `class_idx` to TinySlabMeta
|
||||
- `core/superslab/superslab_types.h` - Remove `size_class` from SuperSlab
|
||||
|
||||
**Changes**:
|
||||
```c
|
||||
// TinySlabMeta: Add class_idx field
|
||||
typedef struct TinySlabMeta {
|
||||
void* freelist;
|
||||
uint16_t used;
|
||||
uint16_t capacity;
|
||||
uint16_t carved;
|
||||
uint8_t class_idx; // NEW: 0-7 for active, 255=UNASSIGNED
|
||||
uint8_t owner_tid_low; // Changed from uint16_t
|
||||
} TinySlabMeta;
|
||||
|
||||
// SuperSlab: Remove size_class
|
||||
typedef struct SuperSlab {
|
||||
uint64_t magic;
|
||||
// uint8_t size_class; // REMOVED!
|
||||
uint8_t active_slabs;
|
||||
uint8_t lg_size;
|
||||
uint8_t _pad0;
|
||||
// ... rest unchanged
|
||||
} SuperSlab;
|
||||
```
|
||||
|
||||
**Compatibility shim** (temporary, for gradual migration):
|
||||
```c
|
||||
// Provide backward-compatible size_class accessor
|
||||
static inline int superslab_get_class(SuperSlab* ss, int slab_idx) {
|
||||
return ss->slabs[slab_idx].class_idx;
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 12-2: Shared Pool Infrastructure
|
||||
|
||||
**New file**: `core/hakmem_shared_pool.h`, `core/hakmem_shared_pool.c`
|
||||
|
||||
**Functionality**:
|
||||
- `shared_pool_init()` - Initialize global pool
|
||||
- `shared_pool_acquire_slab()` - Get free slab for class_idx
|
||||
- `shared_pool_release_slab()` - Mark slab as free (class_idx=255)
|
||||
- `shared_pool_gc()` - Garbage collect empty SuperSlabs
|
||||
|
||||
**Data structure**:
|
||||
```c
|
||||
// Global pool (singleton)
|
||||
SharedSuperSlabPool g_shared_pool = {
|
||||
.slabs = NULL,
|
||||
.capacity = 0,
|
||||
.total_count = 0,
|
||||
.active_count = 0,
|
||||
.alloc_lock = PTHREAD_MUTEX_INITIALIZER,
|
||||
.class_hints = {NULL},
|
||||
.lru_head = NULL,
|
||||
.lru_tail = NULL,
|
||||
.lru_count = 0
|
||||
};
|
||||
```
|
||||
|
||||
### Phase 12-3: Refill Path Integration
|
||||
|
||||
**Files to modify**:
|
||||
- `core/hakmem_tiny_refill_p0.inc.h` - Update to use shared pool
|
||||
- `core/tiny_superslab_alloc.inc.h` - Replace per-class allocation with shared pool
|
||||
|
||||
**Key changes**:
|
||||
```c
|
||||
// OLD: superslab_refill(int class_idx)
|
||||
static SuperSlab* superslab_refill_old(int class_idx) {
|
||||
SuperSlabHead* head = &g_superslab_heads[class_idx];
|
||||
// ... allocate SuperSlab for class_idx only
|
||||
}
|
||||
|
||||
// NEW: superslab_refill(int class_idx) - use shared pool
|
||||
static SuperSlab* superslab_refill_new(int class_idx) {
|
||||
SuperSlab* ss = NULL;
|
||||
int slab_idx = -1;
|
||||
|
||||
// Try to acquire a free slab from shared pool
|
||||
if (shared_pool_acquire_slab(class_idx, &ss, &slab_idx) == 0) {
|
||||
// SUCCESS: Got a slab assigned to class_idx
|
||||
return ss;
|
||||
}
|
||||
|
||||
// FAILURE: All SuperSlabs full, need to allocate new one
|
||||
// (This should be RARE after pool grows to steady-state)
|
||||
return NULL;
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 12-4: Free Path Integration
|
||||
|
||||
**Files to modify**:
|
||||
- `core/tiny_free_fast.inc.h` - Update to handle dynamic class_idx
|
||||
- `core/tiny_superslab_free.inc.h` - Update to release slabs back to pool
|
||||
|
||||
**Key changes**:
|
||||
```c
|
||||
// OLD: Free assumes slab belongs to ss->size_class
|
||||
static inline void hak_tiny_free_superslab_old(void* ptr, SuperSlab* ss) {
|
||||
int class_idx = ss->size_class; // FIXED class
|
||||
// ... free logic
|
||||
}
|
||||
|
||||
// NEW: Free reads class_idx from slab metadata
|
||||
static inline void hak_tiny_free_superslab_new(void* ptr, SuperSlab* ss, int slab_idx) {
|
||||
int class_idx = ss->slabs[slab_idx].class_idx; // DYNAMIC class
|
||||
|
||||
// ... free logic
|
||||
|
||||
// If slab becomes empty, release back to pool
|
||||
if (ss->slabs[slab_idx].used == 0) {
|
||||
shared_pool_release_slab(ss, slab_idx);
|
||||
ss->slabs[slab_idx].class_idx = 255; // Mark as unassigned
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 12-5: Testing & Benchmarking
|
||||
|
||||
**Validation**:
|
||||
1. **Correctness**: Run bench_fixed_size_hakmem 100K iterations (all classes)
|
||||
2. **SuperSlab count**: Monitor g_shared_pool.total_count (expect 100-200)
|
||||
3. **Performance**: bench_random_mixed_hakmem (expect 70-90M ops/s)
|
||||
|
||||
**Expected results**:
|
||||
| Metric | Phase 11 (Before) | Phase 12 (After) | Improvement |
|
||||
|--------|-------------------|------------------|-------------|
|
||||
| SuperSlab count | 877 | 100-200 | -70-80% |
|
||||
| Memory usage | 877MB | 100-200MB | -70-80% |
|
||||
| Metadata overhead | ~1.8MB | ~0.2-0.4MB | -78-89% |
|
||||
| Performance | 9.38M ops/s | 70-90M ops/s | +650-860% |
|
||||
|
||||
---
|
||||
|
||||
## ⚠️ Risk Analysis
|
||||
|
||||
### Complexity Risks
|
||||
|
||||
1. **Concurrency**: Shared pool requires careful locking
|
||||
- **Mitigation**: Per-class hints reduce contention (lock-free fast path)
|
||||
|
||||
2. **Fragmentation**: Mixed classes in same SuperSlab may increase fragmentation
|
||||
- **Mitigation**: Smart slab assignment (prefer same-class SuperSlabs)
|
||||
|
||||
3. **Debugging**: Dynamic class_idx makes debugging harder
|
||||
- **Mitigation**: Add runtime validation (class_idx sanity checks)
|
||||
|
||||
### Performance Risks
|
||||
|
||||
1. **Lock contention**: Shared pool lock may become bottleneck
|
||||
- **Mitigation**: Per-class hints + fast path bypass lock 90%+ of time
|
||||
|
||||
2. **Cache misses**: Accessing distant SuperSlabs may reduce locality
|
||||
- **Mitigation**: LRU cache keeps hot SuperSlabs resident
|
||||
|
||||
---
|
||||
|
||||
## 📊 Success Metrics
|
||||
|
||||
### Primary Goals
|
||||
|
||||
1. **SuperSlab count**: 877 → 100-200 (-70-80%) ✅
|
||||
2. **Performance**: 9.38M → 70-90M ops/s (+650-860%) ✅
|
||||
3. **Memory usage**: 877MB → 100-200MB (-70-80%) ✅
|
||||
|
||||
### Stretch Goals
|
||||
|
||||
1. **System malloc parity**: 90M ops/s (100% of target) 🎯
|
||||
2. **Scalability**: Maintain performance with 4T+ threads
|
||||
3. **Fragmentation**: <10% internal fragmentation
|
||||
|
||||
---
|
||||
|
||||
## 🔄 Migration Strategy
|
||||
|
||||
### Phase 12-1: Metadata (Low Risk)
|
||||
- Add `class_idx` to TinySlabMeta (16B preserved)
|
||||
- Remove `size_class` from SuperSlab
|
||||
- Add backward-compatible shim
|
||||
|
||||
### Phase 12-2: Infrastructure (Medium Risk)
|
||||
- Implement shared pool (NEW code, isolated)
|
||||
- No changes to existing paths yet
|
||||
|
||||
### Phase 12-3: Integration (High Risk)
|
||||
- Update refill path to use shared pool
|
||||
- Update free path to handle dynamic class_idx
|
||||
- **Critical**: Extensive testing required
|
||||
|
||||
### Phase 12-4: Cleanup (Low Risk)
|
||||
- Remove per-class SuperSlabHead structures
|
||||
- Remove backward-compatible shims
|
||||
- Final optimization pass
|
||||
|
||||
---
|
||||
|
||||
## 📝 Next Steps
|
||||
|
||||
### Immediate (Phase 12-1)
|
||||
|
||||
1. ✅ Update `superslab_types.h` - Add `class_idx` to TinySlabMeta
|
||||
2. ✅ Update `superslab_types.h` - Remove `size_class` from SuperSlab
|
||||
3. Add backward-compatible shim `superslab_get_class()`
|
||||
4. Fix compilation errors (grep for `ss->size_class`)
|
||||
|
||||
### Next (Phase 12-2)
|
||||
|
||||
1. Implement `hakmem_shared_pool.h/c`
|
||||
2. Write unit tests for shared pool
|
||||
3. Integrate with LRU cache (Phase 9)
|
||||
|
||||
### Then (Phase 12-3+)
|
||||
|
||||
1. Update refill path
|
||||
2. Update free path
|
||||
3. Benchmark & validate
|
||||
4. Cleanup & optimize
|
||||
|
||||
---
|
||||
|
||||
**Status**: 🚧 Phase 12-1 (Metadata) - IN PROGRESS
|
||||
**Expected completion**: Phase 12-1 today, Phase 12-2 tomorrow, Phase 12-3 day after
|
||||
**Total estimated time**: 3-4 days for full implementation
|
||||
@ -18,7 +18,8 @@ void tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void*
|
||||
if (actual_idx != slab_idx) {
|
||||
tiny_failfast_abort_ptr("free_local_box_idx", ss, slab_idx, ptr, "slab_idx_mismatch");
|
||||
} else {
|
||||
size_t blk = g_tiny_class_sizes[ss->size_class];
|
||||
uint8_t cls = (meta && meta->class_idx < TINY_NUM_CLASSES) ? meta->class_idx : 0;
|
||||
size_t blk = g_tiny_class_sizes[cls];
|
||||
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
|
||||
uintptr_t delta = (uintptr_t)base - (uintptr_t)slab_base;
|
||||
if (blk == 0 || (delta % blk) != 0) {
|
||||
@ -33,7 +34,8 @@ void tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void*
|
||||
|
||||
// FREELIST CORRUPTION DEBUG: Validate pointer before writing
|
||||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||||
size_t blk = g_tiny_class_sizes[ss->size_class];
|
||||
uint8_t cls = (meta && meta->class_idx < TINY_NUM_CLASSES) ? meta->class_idx : 0;
|
||||
size_t blk = g_tiny_class_sizes[cls];
|
||||
uint8_t* base_ss = (uint8_t*)ss;
|
||||
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
|
||||
|
||||
@ -44,32 +46,34 @@ void tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void*
|
||||
|
||||
// Check if prev is within this slab
|
||||
if (prev_addr < (uintptr_t)base_ss || prev_addr >= (uintptr_t)base_ss + (2*1024*1024)) {
|
||||
fprintf(stderr, "[FREE_CORRUPT] prev=%p outside SuperSlab ss=%p (cls=%u slab=%d)\n",
|
||||
prev, ss, ss->size_class, slab_idx);
|
||||
fprintf(stderr, "[FREE_CORRUPT] prev=%p outside SuperSlab ss=%p slab=%d\n",
|
||||
prev, ss, slab_idx);
|
||||
tiny_failfast_abort_ptr("free_local_prev_range", ss, slab_idx, ptr, "prev_outside_ss");
|
||||
}
|
||||
|
||||
// Check alignment of prev
|
||||
if ((prev_addr - slab_addr) % blk != 0) {
|
||||
fprintf(stderr, "[FREE_CORRUPT] prev=%p misaligned (cls=%u slab=%d blk=%zu offset=%zu)\n",
|
||||
prev, ss->size_class, slab_idx, blk, (size_t)(prev_addr - slab_addr));
|
||||
prev, cls, slab_idx, blk, (size_t)(prev_addr - slab_addr));
|
||||
fprintf(stderr, "[FREE_CORRUPT] Writing from ptr=%p, freelist was=%p\n", ptr, prev);
|
||||
tiny_failfast_abort_ptr("free_local_prev_misalign", ss, slab_idx, ptr, "prev_misaligned");
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "[FREE_VERIFY] cls=%u slab=%d ptr=%p prev=%p (offset_ptr=%zu offset_prev=%zu)\n",
|
||||
ss->size_class, slab_idx, ptr, prev,
|
||||
cls, slab_idx, ptr, prev,
|
||||
(size_t)((uintptr_t)base - (uintptr_t)slab_base),
|
||||
prev ? (size_t)((uintptr_t)prev - (uintptr_t)slab_base) : 0);
|
||||
}
|
||||
|
||||
tiny_next_write(ss->size_class, ptr, prev); // Phase E1-CORRECT: Box API
|
||||
// Use per-slab class for freelist linkage
|
||||
uint8_t cls = (meta && meta->class_idx < TINY_NUM_CLASSES) ? meta->class_idx : 0;
|
||||
tiny_next_write(cls, ptr, prev); // Phase E1-CORRECT: Box API with shared pool
|
||||
meta->freelist = ptr;
|
||||
|
||||
// FREELIST CORRUPTION DEBUG: Verify write succeeded
|
||||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||||
void* readback = tiny_next_read(ss->size_class, ptr); // Phase E1-CORRECT: Box API
|
||||
void* readback = tiny_next_read(cls, ptr); // Phase E1-CORRECT: Box API
|
||||
if (readback != prev) {
|
||||
fprintf(stderr, "[FREE_CORRUPT] Wrote prev=%p to ptr=%p but read back %p!\n",
|
||||
prev, ptr, readback);
|
||||
@ -78,7 +82,7 @@ void tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void*
|
||||
}
|
||||
}
|
||||
|
||||
tiny_failfast_log("free_local_box", ss->size_class, ss, meta, ptr, prev);
|
||||
tiny_failfast_log("free_local_box", cls, ss, meta, ptr, prev);
|
||||
// BUGFIX: Memory barrier to ensure freelist visibility before used decrement
|
||||
// Without this, other threads can see new freelist but old used count (race)
|
||||
atomic_thread_fence(memory_order_release);
|
||||
@ -102,7 +106,8 @@ void tiny_free_local_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void*
|
||||
ss_active_dec_one(ss);
|
||||
|
||||
if (prev == NULL) {
|
||||
// First-free → advertise slab to adopters
|
||||
tiny_free_publish_first_free((int)ss->size_class, ss, slab_idx);
|
||||
// First-free → advertise slab to adopters using per-slab class
|
||||
uint8_t cls0 = (meta && meta->class_idx < TINY_NUM_CLASSES) ? meta->class_idx : 0;
|
||||
tiny_free_publish_first_free((int)cls0, ss, slab_idx);
|
||||
}
|
||||
}
|
||||
|
||||
@ -15,7 +15,9 @@ int tiny_free_remote_box(SuperSlab* ss, int slab_idx, TinySlabMeta* meta, void*
|
||||
int transitioned = ss_remote_push(ss, slab_idx, ptr); // ss_active_dec_one() called inside
|
||||
// ss_active_dec_one(ss); // REMOVED: Already called inside ss_remote_push()
|
||||
if (transitioned) {
|
||||
tiny_free_publish_remote_transition((int)ss->size_class, ss, slab_idx);
|
||||
// Phase 12: use per-slab class for publish metadata
|
||||
uint8_t cls = (meta && meta->class_idx < TINY_NUM_CLASSES) ? meta->class_idx : 0;
|
||||
tiny_free_publish_remote_transition((int)cls, ss, slab_idx);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
|
||||
@ -111,41 +111,43 @@ static inline ptr_classification_t registry_lookup(void* ptr) {
|
||||
|
||||
// Query SuperSlab registry
|
||||
struct SuperSlab* ss = hak_super_lookup(ptr);
|
||||
if (!ss) {
|
||||
if (!ss || ss->magic != SUPERSLAB_MAGIC) {
|
||||
// Not in Tiny registry
|
||||
return result;
|
||||
}
|
||||
|
||||
// Found SuperSlab - determine slab index
|
||||
// Found SuperSlab - determine slab index from ptr-1 (block base)
|
||||
result.ss = ss;
|
||||
result.class_idx = ss->size_class;
|
||||
|
||||
// Calculate slab index
|
||||
uintptr_t ptr_addr = (uintptr_t)ptr;
|
||||
uintptr_t ss_addr = (uintptr_t)ss;
|
||||
|
||||
if (ptr_addr < ss_addr) {
|
||||
// Pointer before SuperSlab base (invalid)
|
||||
if (ptr_addr <= ss_addr) {
|
||||
result.kind = PTR_KIND_UNKNOWN;
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t offset = ptr_addr - ss_addr;
|
||||
result.slab_idx = (int)(offset / SLAB_SIZE);
|
||||
|
||||
// Validate slab index (ss_slabs_capacity defined in superslab_inline.h)
|
||||
if (result.slab_idx < 0 || result.slab_idx >= ss_slabs_capacity(ss)) {
|
||||
// Out of range
|
||||
// Use block base for slab index to be consistent with free paths
|
||||
uintptr_t base_addr = ptr_addr - 1;
|
||||
size_t offset = base_addr - ss_addr;
|
||||
int slab_idx = (int)(offset / SLAB_SIZE);
|
||||
if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
|
||||
result.kind = PTR_KIND_UNKNOWN;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Valid Tiny allocation
|
||||
// Only class 7 (1KB) is headerless. Other classes use header-based free path.
|
||||
if (ss->size_class == 7) {
|
||||
result.slab_idx = slab_idx;
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
int cls = (meta->class_idx < TINY_NUM_CLASSES) ? (int)meta->class_idx : -1;
|
||||
result.class_idx = cls;
|
||||
|
||||
if (cls == 7) {
|
||||
// 1KB headerless tiny
|
||||
result.kind = PTR_KIND_TINY_HEADERLESS;
|
||||
} else {
|
||||
} else if (cls >= 0) {
|
||||
// Other tiny classes with 1-byte header
|
||||
result.kind = PTR_KIND_TINY_HEADER;
|
||||
} else {
|
||||
result.kind = PTR_KIND_UNKNOWN;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
260
core/hakmem_shared_pool.c
Normal file
260
core/hakmem_shared_pool.c
Normal file
@ -0,0 +1,260 @@
|
||||
#include "hakmem_shared_pool.h"
|
||||
#include "hakmem_tiny_superslab_constants.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// Phase 12-2: SharedSuperSlabPool skeleton implementation
|
||||
// Goal:
|
||||
// - Centralize SuperSlab allocation/registration
|
||||
// - Provide acquire_slab/release_slab APIs for later refill/free integration
|
||||
// - Keep logic simple & conservative; correctness and observability first.
|
||||
//
|
||||
// Notes:
|
||||
// - Concurrency: protected by g_shared_pool.alloc_lock for now.
|
||||
// - class_hints is best-effort: read lock-free, written under lock.
|
||||
// - LRU hooks left as no-op placeholders.
|
||||
|
||||
SharedSuperSlabPool g_shared_pool = {
|
||||
.slabs = NULL,
|
||||
.capacity = 0,
|
||||
.total_count = 0,
|
||||
.active_count = 0,
|
||||
.alloc_lock = PTHREAD_MUTEX_INITIALIZER,
|
||||
.class_hints = { NULL },
|
||||
.lru_head = NULL,
|
||||
.lru_tail = NULL,
|
||||
.lru_count = 0
|
||||
};
|
||||
|
||||
static void
|
||||
shared_pool_ensure_capacity_unlocked(uint32_t min_capacity)
|
||||
{
|
||||
if (g_shared_pool.capacity >= min_capacity) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t new_cap = g_shared_pool.capacity ? g_shared_pool.capacity : 16;
|
||||
while (new_cap < min_capacity) {
|
||||
new_cap *= 2;
|
||||
}
|
||||
|
||||
SuperSlab** new_slabs = (SuperSlab**)realloc(g_shared_pool.slabs,
|
||||
new_cap * sizeof(SuperSlab*));
|
||||
if (!new_slabs) {
|
||||
// Allocation failure: keep old state; caller must handle NULL later.
|
||||
return;
|
||||
}
|
||||
|
||||
// Zero new entries to keep scanning logic simple.
|
||||
memset(new_slabs + g_shared_pool.capacity, 0,
|
||||
(new_cap - g_shared_pool.capacity) * sizeof(SuperSlab*));
|
||||
|
||||
g_shared_pool.slabs = new_slabs;
|
||||
g_shared_pool.capacity = new_cap;
|
||||
}
|
||||
|
||||
void
|
||||
shared_pool_init(void)
|
||||
{
|
||||
// Idempotent init; safe to call from multiple early paths.
|
||||
// pthread_mutex_t with static initializer is already valid.
|
||||
pthread_mutex_lock(&g_shared_pool.alloc_lock);
|
||||
if (g_shared_pool.capacity == 0 && g_shared_pool.slabs == NULL) {
|
||||
shared_pool_ensure_capacity_unlocked(16);
|
||||
}
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
}
|
||||
|
||||
// Internal: allocate and register a new SuperSlab.
|
||||
// Caller must hold alloc_lock.
|
||||
static SuperSlab*
|
||||
shared_pool_allocate_superslab_unlocked(void)
|
||||
{
|
||||
// Allocate SuperSlab and backing memory region.
|
||||
// NOTE: Existing code likely has a helper; we keep this minimal for now.
|
||||
SuperSlab* ss = (SuperSlab*)aligned_alloc(64, sizeof(SuperSlab));
|
||||
if (!ss) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memset(ss, 0, sizeof(SuperSlab));
|
||||
ss->magic = SUPERSLAB_MAGIC;
|
||||
ss->lg_size = SUPERSLAB_LG_DEFAULT;
|
||||
ss->active_slabs = 0;
|
||||
ss->slab_bitmap = 0;
|
||||
|
||||
// Initialize all per-slab metadata to UNASSIGNED for Phase 12 semantics.
|
||||
for (int i = 0; i < SLABS_PER_SUPERSLAB_MAX; i++) {
|
||||
ss->slabs[i].class_idx = 255; // UNASSIGNED
|
||||
ss->slabs[i].owner_tid_low = 0;
|
||||
}
|
||||
|
||||
// Register into pool array.
|
||||
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
||||
shared_pool_ensure_capacity_unlocked(g_shared_pool.total_count + 1);
|
||||
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
|
||||
free(ss);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
g_shared_pool.slabs[g_shared_pool.total_count] = ss;
|
||||
g_shared_pool.total_count++;
|
||||
// Not counted as active until we assign at least one slab.
|
||||
return ss;
|
||||
}
|
||||
|
||||
SuperSlab*
|
||||
shared_pool_acquire_superslab(void)
|
||||
{
|
||||
shared_pool_init();
|
||||
|
||||
pthread_mutex_lock(&g_shared_pool.alloc_lock);
|
||||
|
||||
// For now, always allocate a fresh SuperSlab and register it.
|
||||
// More advanced reuse/GC comes later.
|
||||
SuperSlab* ss = shared_pool_allocate_superslab_unlocked();
|
||||
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
return ss;
|
||||
}
|
||||
|
||||
int
|
||||
shared_pool_acquire_slab(int class_idx, SuperSlab** ss_out, int* slab_idx_out)
|
||||
{
|
||||
if (!ss_out || !slab_idx_out) {
|
||||
return -1;
|
||||
}
|
||||
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
shared_pool_init();
|
||||
|
||||
// Fast-path hint: read without lock (best-effort).
|
||||
SuperSlab* hint = g_shared_pool.class_hints[class_idx];
|
||||
if (hint) {
|
||||
// Scan for a free, unassigned slab in this SuperSlab.
|
||||
uint32_t bitmap = hint->slab_bitmap;
|
||||
for (int i = 0; i < SLABS_PER_SUPERSLAB_MAX; i++) {
|
||||
uint32_t bit = (1u << i);
|
||||
if ((bitmap & bit) == 0 && hint->slabs[i].class_idx == 255) {
|
||||
// Tentative claim: upgrade under lock to avoid races.
|
||||
pthread_mutex_lock(&g_shared_pool.alloc_lock);
|
||||
// Re-check under lock.
|
||||
bitmap = hint->slab_bitmap;
|
||||
if ((bitmap & bit) == 0 && hint->slabs[i].class_idx == 255) {
|
||||
hint->slab_bitmap |= bit;
|
||||
hint->slabs[i].class_idx = (uint8_t)class_idx;
|
||||
hint->active_slabs++;
|
||||
if (hint->active_slabs == 1) {
|
||||
g_shared_pool.active_count++;
|
||||
}
|
||||
*ss_out = hint;
|
||||
*slab_idx_out = i;
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
return 0;
|
||||
}
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
break; // fall through to slow path
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Slow path: lock and scan all registered SuperSlabs.
|
||||
pthread_mutex_lock(&g_shared_pool.alloc_lock);
|
||||
|
||||
for (uint32_t idx = 0; idx < g_shared_pool.total_count; idx++) {
|
||||
SuperSlab* ss = g_shared_pool.slabs[idx];
|
||||
if (!ss) {
|
||||
continue;
|
||||
}
|
||||
uint32_t bitmap = ss->slab_bitmap;
|
||||
for (int i = 0; i < SLABS_PER_SUPERSLAB_MAX; i++) {
|
||||
uint32_t bit = (1u << i);
|
||||
if ((bitmap & bit) == 0 && ss->slabs[i].class_idx == 255) {
|
||||
// Assign this slab to class_idx.
|
||||
ss->slab_bitmap |= bit;
|
||||
ss->slabs[i].class_idx = (uint8_t)class_idx;
|
||||
ss->active_slabs++;
|
||||
if (ss->active_slabs == 1) {
|
||||
g_shared_pool.active_count++;
|
||||
}
|
||||
// Update hint.
|
||||
g_shared_pool.class_hints[class_idx] = ss;
|
||||
*ss_out = ss;
|
||||
*slab_idx_out = i;
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No existing space: allocate a new SuperSlab and take its first slab.
|
||||
SuperSlab* ss = shared_pool_allocate_superslab_unlocked();
|
||||
if (!ss) {
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int slab_idx = 0;
|
||||
ss->slab_bitmap |= (1u << slab_idx);
|
||||
ss->slabs[slab_idx].class_idx = (uint8_t)class_idx;
|
||||
ss->active_slabs = 1;
|
||||
g_shared_pool.active_count++;
|
||||
|
||||
g_shared_pool.class_hints[class_idx] = ss;
|
||||
|
||||
*ss_out = ss;
|
||||
*slab_idx_out = slab_idx;
|
||||
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
shared_pool_release_slab(SuperSlab* ss, int slab_idx)
|
||||
{
|
||||
if (!ss) {
|
||||
return;
|
||||
}
|
||||
if (slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) {
|
||||
return;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&g_shared_pool.alloc_lock);
|
||||
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
if (meta->used != 0) {
|
||||
// Not actually empty; nothing to do.
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t bit = (1u << slab_idx);
|
||||
if (ss->slab_bitmap & bit) {
|
||||
ss->slab_bitmap &= ~bit;
|
||||
uint8_t old_class = meta->class_idx;
|
||||
meta->class_idx = 255; // UNASSIGNED
|
||||
|
||||
if (ss->active_slabs > 0) {
|
||||
ss->active_slabs--;
|
||||
if (ss->active_slabs == 0 && g_shared_pool.active_count > 0) {
|
||||
g_shared_pool.active_count--;
|
||||
}
|
||||
}
|
||||
|
||||
// Invalidate class hint if it pointed here and this superslab has no free slab
|
||||
// for that class anymore; for now we do a simple best-effort clear.
|
||||
if (old_class < TINY_NUM_CLASSES_SS &&
|
||||
g_shared_pool.class_hints[old_class] == ss) {
|
||||
// We could rescan ss for another matching slab; to keep it cheap, just clear.
|
||||
g_shared_pool.class_hints[old_class] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO Phase 12-4+: if ss->active_slabs == 0, consider GC / unmap.
|
||||
|
||||
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
|
||||
}
|
||||
57
core/hakmem_shared_pool.h
Normal file
57
core/hakmem_shared_pool.h
Normal file
@ -0,0 +1,57 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
#include <stdatomic.h>
|
||||
#include "superslab/superslab_types.h"
|
||||
|
||||
// Shared SuperSlab Pool (Phase 12-2 skeleton)
|
||||
// Multiple tiny size classes share a global set of SuperSlab instances.
|
||||
// This header exposes the minimal API used by refill/free hot paths in Phase 12.
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct SharedSuperSlabPool {
|
||||
SuperSlab** slabs; // Dynamic array of SuperSlab*
|
||||
uint32_t capacity; // Allocated entries in slabs[]
|
||||
uint32_t total_count; // Total SuperSlabs ever allocated (<= capacity)
|
||||
uint32_t active_count; // SuperSlabs that have >0 active slabs
|
||||
|
||||
pthread_mutex_t alloc_lock; // Protects pool metadata and grow/scan operations
|
||||
|
||||
// Per-class hints: last known SuperSlab with a free slab for that class.
|
||||
// Read lock-free (best-effort), updated under alloc_lock.
|
||||
SuperSlab* class_hints[TINY_NUM_CLASSES_SS];
|
||||
|
||||
// LRU cache integration hooks (Phase 9/12, optional for now)
|
||||
SuperSlab* lru_head;
|
||||
SuperSlab* lru_tail;
|
||||
uint32_t lru_count;
|
||||
} SharedSuperSlabPool;
|
||||
|
||||
// Global singleton
|
||||
extern SharedSuperSlabPool g_shared_pool;
|
||||
|
||||
// Initialize shared pool (idempotent, thread-safe wrt multiple callers on startup paths)
|
||||
void shared_pool_init(void);
|
||||
|
||||
// Get/allocate a SuperSlab registered in the pool.
|
||||
// Returns non-NULL on success, NULL on failure.
|
||||
SuperSlab* shared_pool_acquire_superslab(void);
|
||||
|
||||
// Acquire a slab for class_idx from shared pool.
|
||||
// On success:
|
||||
// *ss_out = SuperSlab containing slab
|
||||
// *slab_idx_out = slab index [0, SLABS_PER_SUPERSLAB_MAX)
|
||||
// Returns 0 on success, non-zero on failure.
|
||||
int shared_pool_acquire_slab(int class_idx, SuperSlab** ss_out, int* slab_idx_out);
|
||||
|
||||
// Release an empty slab back to pool (mark as unassigned).
|
||||
// Caller must ensure TinySlabMeta.used == 0.
|
||||
void shared_pool_release_slab(SuperSlab* ss, int slab_idx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@ -73,8 +73,8 @@ int hak_super_register(uintptr_t base, SuperSlab* ss) {
|
||||
|
||||
hash_registered = 1;
|
||||
if (dbg_once == 1) {
|
||||
fprintf(stderr, "[SUPER_REG] register base=%p lg=%d slot=%d class=%d magic=%llx\n",
|
||||
(void*)base, lg, (h + i) & SUPER_REG_MASK, ss->size_class,
|
||||
fprintf(stderr, "[SUPER_REG] register base=%p lg=%d slot=%d magic=%llx\n",
|
||||
(void*)base, lg, (h + i) & SUPER_REG_MASK,
|
||||
(unsigned long long)ss->magic);
|
||||
}
|
||||
break;
|
||||
@ -94,36 +94,8 @@ int hak_super_register(uintptr_t base, SuperSlab* ss) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Step 2: Register in per-class registry (Phase 6: Registry Optimization)
|
||||
// Purpose: Enable O(class_size) refill scan instead of O(262K)
|
||||
int class_idx = ss->size_class;
|
||||
if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
|
||||
int size = g_super_reg_class_size[class_idx];
|
||||
if (size < SUPER_REG_PER_CLASS) {
|
||||
// Check for duplicate registration
|
||||
int already_in_class = 0;
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (g_super_reg_by_class[class_idx][i] == ss) {
|
||||
already_in_class = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!already_in_class) {
|
||||
// Add to per-class registry
|
||||
g_super_reg_by_class[class_idx][size] = ss;
|
||||
g_super_reg_class_size[class_idx]++;
|
||||
}
|
||||
} else {
|
||||
// Per-class registry full (rare). Suppress unless verbose
|
||||
const char* q = getenv("HAKMEM_QUIET");
|
||||
if (!(q && *q && *q != '0')) {
|
||||
fprintf(stderr, "HAKMEM: Per-class registry full for class %d! "
|
||||
"Increase SUPER_REG_PER_CLASS\n", class_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 12: per-class registry not keyed by ss->size_class anymore.
|
||||
// Keep existing global hash registration only.
|
||||
pthread_mutex_unlock(&g_super_reg_lock);
|
||||
return 1;
|
||||
}
|
||||
@ -182,27 +154,7 @@ void hak_super_unregister(uintptr_t base) {
|
||||
hash_removed:
|
||||
// Step 2: Remove from per-class registry (Phase 6: Registry Optimization)
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
int class_idx = ss->size_class;
|
||||
if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
|
||||
int size = g_super_reg_class_size[class_idx];
|
||||
|
||||
// Linear scan to find and remove SuperSlab from per-class array
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (g_super_reg_by_class[class_idx][i] == ss) {
|
||||
// Found: Remove by shifting last element to this position
|
||||
g_super_reg_class_size[class_idx]--;
|
||||
int new_size = g_super_reg_class_size[class_idx];
|
||||
|
||||
// Swap with last element (O(1) removal, order doesn't matter)
|
||||
if (i != new_size) {
|
||||
g_super_reg_by_class[class_idx][i] =
|
||||
g_super_reg_by_class[class_idx][new_size];
|
||||
}
|
||||
g_super_reg_by_class[class_idx][new_size] = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Phase 12: per-class registry no longer keyed; no per-class removal required.
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&g_super_reg_lock);
|
||||
@ -330,8 +282,8 @@ static int ss_lru_evict_one(void) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static int evict_log_count = 0;
|
||||
if (evict_log_count < 10) {
|
||||
fprintf(stderr, "[SS_LRU_EVICT] ss=%p class=%d size=%zu (cache_count=%u)\n",
|
||||
victim, victim->size_class, ss_size, g_ss_lru_cache.total_count);
|
||||
fprintf(stderr, "[SS_LRU_EVICT] ss=%p size=%zu (cache_count=%u)\n",
|
||||
victim, ss_size, g_ss_lru_cache.total_count);
|
||||
evict_log_count++;
|
||||
}
|
||||
#endif
|
||||
@ -395,7 +347,8 @@ SuperSlab* hak_ss_lru_pop(uint8_t size_class) {
|
||||
// Find a matching SuperSlab in cache (same size_class)
|
||||
SuperSlab* curr = g_ss_lru_cache.lru_head;
|
||||
while (curr) {
|
||||
if (curr->size_class == size_class) {
|
||||
// Phase 12: LRU entries are not keyed by ss->size_class; treat any as reusable for now.
|
||||
if (1) {
|
||||
// Found match - remove from cache
|
||||
ss_lru_remove(curr);
|
||||
g_ss_lru_cache.total_count--;
|
||||
@ -407,8 +360,8 @@ SuperSlab* hak_ss_lru_pop(uint8_t size_class) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static int pop_log_count = 0;
|
||||
if (pop_log_count < 10) {
|
||||
fprintf(stderr, "[SS_LRU_POP] Reusing ss=%p class=%d size=%zu (cache_count=%u)\n",
|
||||
curr, size_class, ss_size, g_ss_lru_cache.total_count);
|
||||
fprintf(stderr, "[SS_LRU_POP] Reusing ss=%p size=%zu (cache_count=%u)\n",
|
||||
curr, ss_size, g_ss_lru_cache.total_count);
|
||||
pop_log_count++;
|
||||
}
|
||||
#endif
|
||||
@ -462,8 +415,8 @@ int hak_ss_lru_push(SuperSlab* ss) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static int push_log_count = 0;
|
||||
if (push_log_count < 10) {
|
||||
fprintf(stderr, "[SS_LRU_PUSH] Cached ss=%p class=%d size=%zu (cache_count=%u)\n",
|
||||
ss, ss->size_class, ss_size, g_ss_lru_cache.total_count);
|
||||
fprintf(stderr, "[SS_LRU_PUSH] Cached ss=%p size=%zu (cache_count=%u)\n",
|
||||
ss, ss_size, g_ss_lru_cache.total_count);
|
||||
push_log_count++;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -218,14 +218,12 @@ SuperSlab* superslab_refill(int class_idx);
|
||||
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx);
|
||||
static inline uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap);
|
||||
// Forward decl: used by tiny_spec_pop_path before its definition
|
||||
// Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c)
|
||||
// Note: Remove 'inline' to provide linkable definition for LTO
|
||||
// P0 Fix: When P0 is enabled, use sll_refill_batch_from_ss instead
|
||||
#if HAKMEM_TINY_P0_BATCH_REFILL
|
||||
// P0 enabled: use batch refill
|
||||
// P0 enabled: sll_refill_batch_from_ss is defined in hakmem_tiny_refill_p0.inc.h
|
||||
static inline int sll_refill_batch_from_ss(int class_idx, int max_take);
|
||||
#else
|
||||
// P0 disabled: use original refill
|
||||
// Phase 12: sll_refill_small_from_ss is defined in hakmem_tiny_refill.inc.h
|
||||
// Only a single implementation exists there; declare here for callers.
|
||||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||||
int sll_refill_small_from_ss(int class_idx, int max_take);
|
||||
#else
|
||||
@ -452,9 +450,10 @@ static inline void tiny_debug_track_alloc_ret(int cls, void* ptr) {
|
||||
tiny_failfast_abort_ptr("alloc_ret_slabidx", ss, slab_idx, ptr, "slab_idx_mismatch");
|
||||
} else {
|
||||
// Fail-Fast: class vs SuperSlab size_class must be consistent.
|
||||
if (ss->size_class != cls) {
|
||||
tiny_failfast_abort_ptr("alloc_ret_cls_mismatch", ss, slab_idx, ptr, "class_mismatch");
|
||||
}
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
if (meta->class_idx != (uint8_t)cls) {
|
||||
tiny_failfast_abort_ptr("alloc_ret_cls_mismatch", ss, slab_idx, ptr, "class_mismatch");
|
||||
}
|
||||
size_t blk = g_tiny_class_sizes[cls];
|
||||
uintptr_t base = (uintptr_t)tiny_slab_base_for(ss, slab_idx);
|
||||
uintptr_t delta = (uintptr_t)base_ptr - base;
|
||||
@ -838,11 +837,11 @@ void ss_partial_publish(int class_idx, SuperSlab* ss) {
|
||||
// The publishing thread must stop using this SS after publishing.
|
||||
int cap_pub = ss_slabs_capacity(ss);
|
||||
for (int s = 0; s < cap_pub; s++) {
|
||||
uint32_t prev = __atomic_exchange_n(&ss->slabs[s].owner_tid, 0u, __ATOMIC_RELEASE);
|
||||
uint8_t prev = __atomic_exchange_n(&ss->slabs[s].owner_tid_low, 0u, __ATOMIC_RELEASE);
|
||||
if (__builtin_expect(g_debug_remote_guard && prev != 0u, 0)) {
|
||||
uintptr_t aux = ((uintptr_t)s << 32) | (uintptr_t)prev;
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_OWNER_RELEASE,
|
||||
(uint16_t)ss->size_class,
|
||||
(uint16_t)ss->slabs[s].class_idx,
|
||||
&ss->slabs[s],
|
||||
aux);
|
||||
}
|
||||
@ -939,8 +938,8 @@ SuperSlab* ss_partial_adopt(int class_idx) {
|
||||
}
|
||||
|
||||
static inline void tiny_tls_bind_slab(TinyTLSSlab* tls, SuperSlab* ss, int slab_idx) {
|
||||
// Canonical binding:
|
||||
// - ss->size_class defines block size for this SuperSlab
|
||||
// Canonical binding under Phase 12:
|
||||
// - Per-slab TinySlabMeta.class_idx defines class for this slab
|
||||
// - slab_idx is the owning slab index within ss
|
||||
// - slab_base is ALWAYS derived from tiny_slab_base_for(ss, slab_idx)
|
||||
tls->ss = ss;
|
||||
@ -1447,7 +1446,8 @@ static inline void* hak_tiny_alloc_superslab_try_fast(int class_idx) {
|
||||
if (!meta) return NULL;
|
||||
// Try linear (bump) allocation first when freelist is empty
|
||||
if (meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
|
||||
size_t block_size = g_tiny_class_sizes[tls->ss->size_class];
|
||||
// Use per-slab class_idx to get stride
|
||||
size_t block_size = tiny_stride_for_class(meta->class_idx);
|
||||
void* block = tls->slab_base + ((size_t)meta->used * block_size);
|
||||
meta->used++;
|
||||
// Track active blocks in SuperSlab for conservative reclamation
|
||||
|
||||
@ -33,15 +33,13 @@ core/hakmem_tiny.o: core/hakmem_tiny.c core/hakmem_tiny.h \
|
||||
core/box/../ptr_track.h core/hakmem_tiny_hotmag.inc.h \
|
||||
core/hakmem_tiny_hot_pop.inc.h core/hakmem_tiny_fastcache.inc.h \
|
||||
core/hakmem_tiny_refill.inc.h core/tiny_box_geometry.h \
|
||||
core/hakmem_tiny_refill_p0.inc.h core/tiny_refill_opt.h \
|
||||
core/tiny_region_id.h core/ptr_track.h core/tiny_fc_api.h \
|
||||
core/box/integrity_box.h core/hakmem_tiny_ultra_front.inc.h \
|
||||
core/hakmem_tiny_intel.inc core/hakmem_tiny_background.inc \
|
||||
core/hakmem_tiny_bg_bin.inc.h core/hakmem_tiny_tls_ops.h \
|
||||
core/hakmem_tiny_remote.inc core/hakmem_tiny_init.inc \
|
||||
core/box/prewarm_box.h core/hakmem_tiny_bump.inc.h \
|
||||
core/hakmem_tiny_smallmag.inc.h core/tiny_atomic.h \
|
||||
core/tiny_alloc_fast.inc.h core/tiny_alloc_fast_sfc.inc.h \
|
||||
core/hakmem_tiny_ultra_front.inc.h core/hakmem_tiny_intel.inc \
|
||||
core/hakmem_tiny_background.inc core/hakmem_tiny_bg_bin.inc.h \
|
||||
core/hakmem_tiny_tls_ops.h core/hakmem_tiny_remote.inc \
|
||||
core/hakmem_tiny_init.inc core/box/prewarm_box.h \
|
||||
core/hakmem_tiny_bump.inc.h core/hakmem_tiny_smallmag.inc.h \
|
||||
core/tiny_atomic.h core/tiny_alloc_fast.inc.h \
|
||||
core/tiny_alloc_fast_sfc.inc.h core/tiny_region_id.h \
|
||||
core/tiny_alloc_fast_inline.h core/tiny_free_fast.inc.h \
|
||||
core/hakmem_tiny_alloc.inc core/hakmem_tiny_slow.inc \
|
||||
core/hakmem_tiny_free.inc core/box/free_publish_box.h core/mid_tcache.h \
|
||||
@ -50,7 +48,7 @@ core/hakmem_tiny.o: core/hakmem_tiny.c core/hakmem_tiny.h \
|
||||
core/box/../superslab/superslab_types.h core/box/../tiny_tls.h \
|
||||
core/tiny_superslab_free.inc.h core/box/free_remote_box.h \
|
||||
core/box/free_local_box.h core/hakmem_tiny_lifecycle.inc \
|
||||
core/hakmem_tiny_slab_mgmt.inc
|
||||
core/hakmem_tiny_slab_mgmt.inc core/tiny_fc_api.h
|
||||
core/hakmem_tiny.h:
|
||||
core/hakmem_build_flags.h:
|
||||
core/hakmem_trace.h:
|
||||
@ -127,12 +125,6 @@ core/hakmem_tiny_hot_pop.inc.h:
|
||||
core/hakmem_tiny_fastcache.inc.h:
|
||||
core/hakmem_tiny_refill.inc.h:
|
||||
core/tiny_box_geometry.h:
|
||||
core/hakmem_tiny_refill_p0.inc.h:
|
||||
core/tiny_refill_opt.h:
|
||||
core/tiny_region_id.h:
|
||||
core/ptr_track.h:
|
||||
core/tiny_fc_api.h:
|
||||
core/box/integrity_box.h:
|
||||
core/hakmem_tiny_ultra_front.inc.h:
|
||||
core/hakmem_tiny_intel.inc:
|
||||
core/hakmem_tiny_background.inc:
|
||||
@ -146,6 +138,7 @@ core/hakmem_tiny_smallmag.inc.h:
|
||||
core/tiny_atomic.h:
|
||||
core/tiny_alloc_fast.inc.h:
|
||||
core/tiny_alloc_fast_sfc.inc.h:
|
||||
core/tiny_region_id.h:
|
||||
core/tiny_alloc_fast_inline.h:
|
||||
core/tiny_free_fast.inc.h:
|
||||
core/hakmem_tiny_alloc.inc:
|
||||
@ -163,3 +156,4 @@ core/box/free_remote_box.h:
|
||||
core/box/free_local_box.h:
|
||||
core/hakmem_tiny_lifecycle.inc:
|
||||
core/hakmem_tiny_slab_mgmt.inc:
|
||||
core/tiny_fc_api.h:
|
||||
|
||||
@ -67,24 +67,28 @@ void bg_spill_drain_class(int class_idx, pthread_mutex_t* lock) {
|
||||
void* node = (void*)chain;
|
||||
while (node) {
|
||||
SuperSlab* owner_ss = hak_super_lookup(node);
|
||||
int node_class_idx = owner_ss ? owner_ss->size_class : 0;
|
||||
void* next = tiny_next_read(class_idx, node);
|
||||
if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) {
|
||||
int slab_idx = slab_index_for(owner_ss, node);
|
||||
TinySlabMeta* meta = &owner_ss->slabs[slab_idx];
|
||||
if (!tiny_remote_guard_allow_local_push(owner_ss, slab_idx, meta, node, "bg_spill", self_tid)) {
|
||||
(void)ss_remote_push(owner_ss, slab_idx, node);
|
||||
if (meta->used > 0) meta->used--;
|
||||
node = next;
|
||||
continue;
|
||||
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(owner_ss)) {
|
||||
TinySlabMeta* meta = &owner_ss->slabs[slab_idx];
|
||||
uint8_t node_class_idx = (meta->class_idx < TINY_NUM_CLASSES)
|
||||
? meta->class_idx
|
||||
: (uint8_t)class_idx;
|
||||
if (!tiny_remote_guard_allow_local_push(owner_ss, slab_idx, meta, node, "bg_spill", self_tid)) {
|
||||
(void)ss_remote_push(owner_ss, slab_idx, node);
|
||||
if (meta->used > 0) meta->used--;
|
||||
node = next;
|
||||
continue;
|
||||
}
|
||||
void* prev = meta->freelist;
|
||||
// Phase 12: use per-slab class for next pointer
|
||||
tiny_next_write(node_class_idx, node, prev);
|
||||
meta->freelist = node;
|
||||
tiny_failfast_log("bg_spill", node_class_idx, owner_ss, meta, node, prev);
|
||||
meta->used--;
|
||||
// Active was decremented at free time
|
||||
}
|
||||
void* prev = meta->freelist;
|
||||
// Phase E1-CORRECT: ALL classes have headers, use Box API
|
||||
tiny_next_write(class_idx, node, prev);
|
||||
meta->freelist = node;
|
||||
tiny_failfast_log("bg_spill", owner_ss->size_class, owner_ss, meta, node, prev);
|
||||
meta->used--;
|
||||
// Active was decremented at free time
|
||||
}
|
||||
node = next;
|
||||
}
|
||||
|
||||
@ -111,11 +111,20 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
// SuperSlab path: Get class_idx from SuperSlab
|
||||
SuperSlab* ss = hak_super_lookup(ptr);
|
||||
if (!ss || ss->magic != SUPERSLAB_MAGIC) return;
|
||||
int class_idx = ss->size_class;
|
||||
// Derive class_idx from per-slab metadata instead of ss->size_class
|
||||
int class_idx = -1;
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
int slab_idx = slab_index_for(ss, base);
|
||||
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* meta_probe = &ss->slabs[slab_idx];
|
||||
if (meta_probe->class_idx < TINY_NUM_CLASSES) {
|
||||
class_idx = (int)meta_probe->class_idx;
|
||||
}
|
||||
}
|
||||
size_t ss_size = (size_t)1ULL << ss->lg_size;
|
||||
uintptr_t ss_base = (uintptr_t)ss;
|
||||
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFFu, ss, (uintptr_t)ss->size_class);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFFu, ss, (uintptr_t)class_idx);
|
||||
return;
|
||||
}
|
||||
// Optional: cross-lookup TinySlab owner and detect class mismatch early
|
||||
@ -135,8 +144,8 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)class_idx, ptr, 0);
|
||||
// Detect cross-thread: cross-thread free MUST go via superslab path
|
||||
// ✅ FIX: Phase E1-CORRECT - Convert USER → BASE before slab index calculation
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
int slab_idx = slab_index_for(ss, base);
|
||||
base = (void*)((uint8_t*)ptr - 1);
|
||||
slab_idx = slab_index_for(ss, base);
|
||||
int ss_cap = ss_slabs_capacity(ss);
|
||||
if (__builtin_expect(slab_idx < 0 || slab_idx >= ss_cap, 0)) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFEu, ss, (uintptr_t)slab_idx);
|
||||
@ -162,7 +171,8 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
}
|
||||
}
|
||||
uint32_t self_tid = tiny_self_u32();
|
||||
if (__builtin_expect(meta->owner_tid != self_tid, 0)) {
|
||||
uint8_t self_tid_low = (uint8_t)self_tid;
|
||||
if (__builtin_expect(meta->owner_tid_low != self_tid_low || meta->owner_tid_low == 0, 0)) {
|
||||
// route directly to superslab (remote queue / freelist)
|
||||
uintptr_t ptr_val = (uintptr_t)ptr;
|
||||
uintptr_t ss_base = (uintptr_t)ss;
|
||||
@ -201,7 +211,8 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
meta->used--;
|
||||
ss_active_dec_one(ss);
|
||||
if (prev == NULL) {
|
||||
ss_partial_publish((int)ss->size_class, ss);
|
||||
// Publish using the slab's class (per-slab class_idx)
|
||||
ss_partial_publish(class_idx, ss);
|
||||
}
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx);
|
||||
HAK_STAT_FREE(class_idx);
|
||||
@ -290,9 +301,18 @@ void hak_tiny_free(void* ptr) {
|
||||
{
|
||||
int class_idx = -1;
|
||||
if (g_use_superslab) {
|
||||
// FIXED: Use hak_super_lookup() instead of hak_super_lookup() to avoid false positives
|
||||
// Resolve class_idx from per-slab metadata instead of ss->size_class
|
||||
SuperSlab* ss = hak_super_lookup(ptr);
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) class_idx = ss->size_class;
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
int sidx = slab_index_for(ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* m = &ss->slabs[sidx];
|
||||
if (m->class_idx < TINY_NUM_CLASSES) {
|
||||
class_idx = (int)m->class_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (class_idx < 0) {
|
||||
TinySlab* slab = hak_tiny_owner_slab(ptr);
|
||||
@ -340,9 +360,18 @@ void hak_tiny_free(void* ptr) {
|
||||
if (g_tiny_ultra) {
|
||||
int class_idx = -1;
|
||||
if (g_use_superslab) {
|
||||
// FIXED: Use hak_super_lookup() instead of hak_super_lookup() to avoid false positives
|
||||
// Resolve class_idx from per-slab metadata instead of ss->size_class
|
||||
SuperSlab* ss = hak_super_lookup(ptr);
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) class_idx = ss->size_class;
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
int sidx = slab_index_for(ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* m = &ss->slabs[sidx];
|
||||
if (m->class_idx < TINY_NUM_CLASSES) {
|
||||
class_idx = (int)m->class_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (class_idx < 0) {
|
||||
TinySlab* slab = hak_tiny_owner_slab(ptr);
|
||||
@ -411,13 +440,16 @@ void hak_tiny_free(void* ptr) {
|
||||
if (g_use_superslab) {
|
||||
fast_ss = hak_super_lookup(ptr);
|
||||
if (fast_ss && fast_ss->magic == SUPERSLAB_MAGIC) {
|
||||
fast_class_idx = fast_ss->size_class;
|
||||
// BUGFIX: Validate size_class before using as array index (prevents OOB = 85% of FREE_TO_SS SEGV)
|
||||
if (__builtin_expect(fast_class_idx < 0 || fast_class_idx >= TINY_NUM_CLASSES, 0)) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xF0, ptr, (uintptr_t)fast_class_idx);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
int sidx = slab_index_for(fast_ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(fast_ss)) {
|
||||
TinySlabMeta* m = &fast_ss->slabs[sidx];
|
||||
if (m->class_idx < TINY_NUM_CLASSES) {
|
||||
fast_class_idx = (int)m->class_idx;
|
||||
}
|
||||
}
|
||||
if (fast_class_idx < 0) {
|
||||
fast_ss = NULL;
|
||||
fast_class_idx = -1;
|
||||
}
|
||||
} else {
|
||||
fast_ss = NULL;
|
||||
@ -431,7 +463,16 @@ void hak_tiny_free(void* ptr) {
|
||||
if (__builtin_expect(g_tiny_safe_free && fast_class_idx >= 0, 0)) {
|
||||
int ss_cls = -1, ts_cls = -1;
|
||||
SuperSlab* chk_ss = fast_ss ? fast_ss : (g_use_superslab ? hak_super_lookup(ptr) : NULL);
|
||||
if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) ss_cls = chk_ss->size_class;
|
||||
if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) {
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
int sidx = slab_index_for(chk_ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(chk_ss)) {
|
||||
TinySlabMeta* m = &chk_ss->slabs[sidx];
|
||||
if (m->class_idx < TINY_NUM_CLASSES) {
|
||||
ss_cls = (int)m->class_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
TinySlab* chk_slab = fast_slab ? fast_slab : hak_tiny_owner_slab(ptr);
|
||||
if (chk_slab) ts_cls = chk_slab->class_idx;
|
||||
if (ss_cls >= 0 && ts_cls >= 0 && ss_cls != ts_cls) {
|
||||
@ -462,15 +503,22 @@ void hak_tiny_free(void* ptr) {
|
||||
}
|
||||
}
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
// BUGFIX: Validate size_class before using as array index (prevents OOB)
|
||||
if (__builtin_expect(ss->size_class < 0 || ss->size_class >= TINY_NUM_CLASSES, 0)) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xF2, ptr, (uintptr_t)ss->size_class);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||||
// Derive class from per-slab meta
|
||||
int cls = -1;
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
int sidx = slab_index_for(ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* m = &ss->slabs[sidx];
|
||||
if (m->class_idx < TINY_NUM_CLASSES) {
|
||||
cls = (int)m->class_idx;
|
||||
}
|
||||
}
|
||||
if (cls < 0) {
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); }
|
||||
return;
|
||||
}
|
||||
// Direct SuperSlab free (avoid second lookup TOCTOU)
|
||||
hak_tiny_free_superslab(ptr, ss);
|
||||
HAK_STAT_FREE(ss->size_class);
|
||||
HAK_STAT_FREE(cls);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -12,6 +12,18 @@
|
||||
// Cold/maintenance path - not performance critical.
|
||||
#include "tiny_tls_guard.h"
|
||||
|
||||
// Phase 12: Helper to derive a representative class index for a SuperSlab
|
||||
// from per-slab metadata (all slabs are empty when used in trim).
|
||||
static inline int superslab_any_class_idx(SuperSlab* ss) {
|
||||
if (!ss) return -1;
|
||||
int cap = ss_slabs_capacity(ss);
|
||||
for (int s = 0; s < cap; s++) {
|
||||
uint8_t cls = ss->slabs[s].class_idx;
|
||||
if (cls < TINY_NUM_CLASSES) return (int)cls;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void hak_tiny_trim(void) {
|
||||
static _Atomic int g_trim_call_count = 0;
|
||||
int call_count = atomic_fetch_add_explicit(&g_trim_call_count, 1, memory_order_relaxed);
|
||||
@ -94,11 +106,12 @@ void hak_tiny_trim(void) {
|
||||
static _Atomic int g_debug_ss_scan = 0;
|
||||
int scan_count = atomic_fetch_add_explicit(&g_debug_ss_scan, 1, memory_order_relaxed);
|
||||
if (scan_count < 20) { // First 20 SS scans
|
||||
int log_cls = superslab_any_class_idx(ss);
|
||||
fprintf(stderr, "[DEBUG trim scan] ss=%p class=%d active=%u\n",
|
||||
(void*)ss, ss->size_class, active);
|
||||
(void*)ss, log_cls, active);
|
||||
}
|
||||
if (active != 0) continue;
|
||||
int k = ss->size_class;
|
||||
int k = superslab_any_class_idx(ss);
|
||||
if (k < 0 || k >= TINY_NUM_CLASSES) continue;
|
||||
// Do not free if current thread still caches this SS in TLS
|
||||
if (g_tls_slabs[k].ss == ss) continue;
|
||||
|
||||
@ -128,7 +128,8 @@ void hak_tiny_magazine_flush(int class_idx) {
|
||||
if (meta->used > 0) meta->used--;
|
||||
continue;
|
||||
}
|
||||
tiny_next_write(owner_ss->size_class, it.ptr, meta->freelist);
|
||||
uint8_t cls = (meta->class_idx < TINY_NUM_CLASSES) ? meta->class_idx : (uint8_t)class_idx;
|
||||
tiny_next_write(cls, it.ptr, meta->freelist);
|
||||
meta->freelist = it.ptr;
|
||||
meta->used--;
|
||||
// Active was decremented at free time
|
||||
|
||||
@ -33,12 +33,9 @@ int hak_tiny_is_managed_superslab(void* ptr) {
|
||||
// Safety: Only check if g_use_superslab is enabled
|
||||
if (g_use_superslab) {
|
||||
SuperSlab* ss = hak_super_lookup(ptr);
|
||||
// Phase 8.2 optimization: Use alignment check instead of mincore()
|
||||
// SuperSlabs are always SUPERSLAB_SIZE-aligned (2MB)
|
||||
if (ss && ((uintptr_t)ss & (SUPERSLAB_SIZE - 1)) == 0) {
|
||||
if (ss->magic == SUPERSLAB_MAGIC) {
|
||||
return 1; // Valid SuperSlab pointer
|
||||
}
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
// Phase 12: Use registry hit as "managed by SuperSlab"
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -53,16 +50,20 @@ size_t hak_tiny_usable_size(void* ptr) {
|
||||
if (g_use_superslab) {
|
||||
SuperSlab* ss = hak_super_lookup(ptr);
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
int k = (int)ss->size_class;
|
||||
if (k >= 0 && k < TINY_NUM_CLASSES) {
|
||||
// Phase E1-CORRECT: g_tiny_class_sizes = total size (stride)
|
||||
// Usable = stride - 1 (for 1-byte header)
|
||||
// Phase 12: derive class from per-slab meta (no ss->size_class)
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
int slab_idx = slab_index_for(ss, base);
|
||||
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
int k = (meta->class_idx < TINY_NUM_CLASSES) ? (int)meta->class_idx : -1;
|
||||
if (k >= 0) {
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
size_t stride = g_tiny_class_sizes[k];
|
||||
return (stride > 0) ? (stride - 1) : 0;
|
||||
size_t stride = g_tiny_class_sizes[k];
|
||||
return (stride > 0) ? (stride - 1) : 0;
|
||||
#else
|
||||
return g_tiny_class_sizes[k];
|
||||
return g_tiny_class_sizes[k];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -114,7 +114,7 @@ static inline void tiny_debug_validate_node_base(int class_idx, void* node, cons
|
||||
fprintf(stderr, "[SLL_NODE_UNKNOWN] %s: node=%p cls=%d\n", where, node, class_idx);
|
||||
abort();
|
||||
}
|
||||
int ocls = ss->size_class;
|
||||
int ocls = meta ? meta->class_idx : -1;
|
||||
if (ocls == 7 || ocls != class_idx) {
|
||||
fprintf(stderr, "[SLL_NODE_CLASS_MISMATCH] %s: node=%p cls=%d owner_cls=%d\n", where, node, class_idx, ocls);
|
||||
abort();
|
||||
@ -257,9 +257,6 @@ static inline int quick_refill_from_mag(int class_idx) {
|
||||
return take;
|
||||
}
|
||||
|
||||
// P0 optimization: Batch refill(A/Bテスト用ランタイムゲートで呼び分け)
|
||||
// - デフォルトはOFF(環境変数 HAKMEM_TINY_P0_ENABLE=1 で有効化)
|
||||
#include "hakmem_tiny_refill_p0.inc.h"
|
||||
|
||||
// Box 3 wrapper: verify linear carve stays within slab usable bytes (Fail-Fast)
|
||||
// DEPRECATED: Use tiny_carve_guard_verbose() from Box 3 directly
|
||||
@ -269,7 +266,9 @@ static inline int tiny_linear_carve_guard(TinyTLSSlab* tls,
|
||||
uint32_t reserve,
|
||||
const char* stage) {
|
||||
if (!tls || !meta) return 0;
|
||||
int class_idx = tls->ss ? tls->ss->size_class : -1;
|
||||
int class_idx = (tls->meta && tls->meta->class_idx < TINY_NUM_CLASSES)
|
||||
? (int)tls->meta->class_idx
|
||||
: -1;
|
||||
return tiny_carve_guard_verbose(stage,
|
||||
class_idx,
|
||||
tls->slab_idx,
|
||||
@ -282,174 +281,75 @@ static inline int tiny_linear_carve_guard(TinyTLSSlab* tls,
|
||||
|
||||
// Refill a few nodes directly into TLS SLL from TLS-cached SuperSlab (owner-thread only)
|
||||
// Note: If HAKMEM_TINY_P0_BATCH_REFILL is enabled, sll_refill_batch_from_ss is used instead
|
||||
#if !HAKMEM_TINY_P0_BATCH_REFILL
|
||||
// Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c)
|
||||
// Note: Force non-inline to provide linkable definition for LTO
|
||||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||||
__attribute__((noinline)) int sll_refill_small_from_ss(int class_idx, int max_take) {
|
||||
#else
|
||||
static inline int sll_refill_small_from_ss(int class_idx, int max_take) {
|
||||
#endif
|
||||
// PRIORITY 1: Bounds check before TLS array access
|
||||
HAK_CHECK_CLASS_IDX(class_idx, "sll_refill_small_from_ss");
|
||||
atomic_fetch_add(&g_integrity_check_class_bounds, 1);
|
||||
|
||||
// Phase E1-CORRECT: C7 now has headers, can use small refill
|
||||
if (!g_use_superslab || max_take <= 0)
|
||||
return 0;
|
||||
|
||||
if (!g_use_superslab || max_take <= 0) return 0;
|
||||
// ランタイムA/B: P0を有効化している場合はバッチrefillへ委譲
|
||||
do {
|
||||
// 既定: OFF(HAKMEM_TINY_P0_ENABLE=1 で有効化)
|
||||
static int g_p0_enable = -1;
|
||||
if (__builtin_expect(g_p0_enable == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_P0_ENABLE");
|
||||
// 環境変数が'1'のときだけ有効、それ以外(未設定含む)は無効
|
||||
g_p0_enable = (e && *e && *e == '1') ? 1 : 0;
|
||||
}
|
||||
if (__builtin_expect(g_p0_enable, 0)) {
|
||||
return sll_refill_batch_from_ss(class_idx, max_take);
|
||||
}
|
||||
} while (0);
|
||||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||||
if (!tls->ss) {
|
||||
// Try to obtain a SuperSlab for this class
|
||||
if (superslab_refill(class_idx) == NULL) return 0;
|
||||
// CRITICAL FIX: Reload tls pointer after superslab_refill() binds new slab
|
||||
if (!tls->ss || !tls->meta || tls->meta->class_idx != (uint8_t)class_idx) {
|
||||
if (!superslab_refill(class_idx))
|
||||
return 0;
|
||||
tls = &g_tls_slabs[class_idx];
|
||||
if (!tls->ss || !tls->meta || tls->meta->class_idx != (uint8_t)class_idx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
TinySlabMeta* meta = tls->meta;
|
||||
if (!meta) return 0;
|
||||
|
||||
// Class 4/5/6/7 special-case: simple batch refill (favor linear carve, minimal branching)
|
||||
// Optional gate for class3 via env: HAKMEM_TINY_SIMPLE_REFILL_C3=1
|
||||
static int g_simple_c3 = -1;
|
||||
if (__builtin_expect(g_simple_c3 == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_SIMPLE_REFILL_C3");
|
||||
g_simple_c3 = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
if (__builtin_expect(class_idx >= 4 || (class_idx == 3 && g_simple_c3), 0)) {
|
||||
uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP);
|
||||
int room = (int)sll_cap - (int)g_tls_sll_count[class_idx];
|
||||
if (room <= 0) return 0;
|
||||
int take = max_take < room ? max_take : room;
|
||||
int taken = 0;
|
||||
// Box 3: Get stride (block size + header, except C7 which is headerless)
|
||||
size_t bs = tiny_stride_for_class(class_idx);
|
||||
for (; taken < take;) {
|
||||
// Linear first (LIKELY for class7)
|
||||
if (__builtin_expect(meta->freelist == NULL && meta->carved < meta->capacity, 1)) {
|
||||
if (__builtin_expect(!tiny_linear_carve_guard(tls, meta, bs, 1, "simple"), 0)) {
|
||||
abort();
|
||||
}
|
||||
// Box 3: Get slab base (handles Slab 0 offset)
|
||||
uint8_t* base = tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||||
void* p = tiny_block_at_index(base, meta->carved, bs);
|
||||
meta->carved++;
|
||||
meta->used++;
|
||||
|
||||
// Phase E1-CORRECT: Restore header BEFORE tls_sll_push
|
||||
// ROOT CAUSE: Simple refill path carves blocks but doesn't write headers.
|
||||
// tls_sll_push() expects headers at base to write next at base+1.
|
||||
// ALL classes (including C7) need headers restored!
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
*(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
|
||||
#endif
|
||||
|
||||
// CRITICAL: Use Box TLS-SLL API (C7-safe, no race)
|
||||
if (!tls_sll_push(class_idx, p, sll_cap)) {
|
||||
// SLL full (should not happen, room was checked)
|
||||
meta->used--; meta->carved--; // Rollback
|
||||
break;
|
||||
}
|
||||
ss_active_inc(tls->ss);
|
||||
taken++;
|
||||
continue;
|
||||
}
|
||||
// Freelist fallback
|
||||
if (__builtin_expect(meta->freelist != NULL, 0)) {
|
||||
void* p = meta->freelist;
|
||||
// BUG FIX: Use Box API to read next pointer at correct offset
|
||||
void* next = tiny_next_read(class_idx, p);
|
||||
meta->freelist = next;
|
||||
meta->used++;
|
||||
|
||||
// Phase E1-CORRECT: Restore header BEFORE tls_sll_push
|
||||
// Freelist stores next at base (offset 0), overwriting header.
|
||||
// Must restore header so tls_sll_push can write next at base+1 correctly.
|
||||
// ALL classes (including C7) need headers restored!
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
*(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
|
||||
#endif
|
||||
|
||||
// CRITICAL: Use Box TLS-SLL API (C7-safe, no race)
|
||||
if (!tls_sll_push(class_idx, p, sll_cap)) {
|
||||
// SLL full (should not happen, room was checked)
|
||||
// BUG FIX: Use Box API to write rollback next pointer
|
||||
tiny_next_write(class_idx, p, next); // Rollback freelist
|
||||
meta->freelist = p;
|
||||
meta->used--;
|
||||
break;
|
||||
}
|
||||
ss_active_inc(tls->ss);
|
||||
taken++;
|
||||
continue;
|
||||
}
|
||||
// Need another slab with space
|
||||
if (__builtin_expect(superslab_refill(class_idx) == NULL, 0)) break;
|
||||
// CRITICAL FIX: Reload tls pointer after superslab_refill() binds new slab
|
||||
tls = &g_tls_slabs[class_idx];
|
||||
meta = tls->meta; // refresh after refill
|
||||
}
|
||||
return taken;
|
||||
}
|
||||
|
||||
// Compute how many we can actually push into SLL without overflow
|
||||
uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP);
|
||||
int room = (int)sll_cap - (int)g_tls_sll_count[class_idx];
|
||||
if (room <= 0) return 0;
|
||||
int take = max_take < room ? max_take : room;
|
||||
if (room <= 0)
|
||||
return 0;
|
||||
|
||||
int take = max_take < room ? max_take : room;
|
||||
int taken = 0;
|
||||
// Box 3: Get stride (block size + header, except C7 which is headerless)
|
||||
size_t bs = tiny_stride_for_class(class_idx);
|
||||
|
||||
while (taken < take) {
|
||||
void* p = NULL;
|
||||
if (__builtin_expect(meta->freelist != NULL, 0)) {
|
||||
// BUG FIX: Use Box API to read next pointer at correct offset
|
||||
p = meta->freelist; meta->freelist = tiny_next_read(class_idx, p); meta->used++;
|
||||
// Track active blocks reserved into TLS SLL
|
||||
|
||||
if (meta->freelist) {
|
||||
p = meta->freelist;
|
||||
meta->freelist = tiny_next_read(class_idx, p);
|
||||
meta->used++;
|
||||
ss_active_inc(tls->ss);
|
||||
} else if (__builtin_expect(meta->carved < meta->capacity, 1)) {
|
||||
if (__builtin_expect(!tiny_linear_carve_guard(tls, meta, bs, 1, "general"), 0)) {
|
||||
} else if (meta->carved < meta->capacity) {
|
||||
if (!tiny_linear_carve_guard(tls, meta, bs, 1, "sll_refill_small"))
|
||||
abort();
|
||||
}
|
||||
// Box 3: Get slab base and calculate block address
|
||||
uint8_t* slab_start = tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||||
p = tiny_block_at_index(slab_start, meta->carved, bs);
|
||||
meta->carved++;
|
||||
meta->used++;
|
||||
// Track active blocks reserved into TLS SLL
|
||||
ss_active_inc(tls->ss);
|
||||
} else {
|
||||
// Move to another slab with space
|
||||
if (superslab_refill(class_idx) == NULL) break;
|
||||
// CRITICAL FIX: Reload tls pointer after superslab_refill() binds new slab
|
||||
if (!superslab_refill(class_idx))
|
||||
break;
|
||||
tls = &g_tls_slabs[class_idx];
|
||||
meta = tls->meta; // refresh after refill
|
||||
meta = tls->meta;
|
||||
if (!tls->ss || !meta || meta->class_idx != (uint8_t)class_idx)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
if (!p) break;
|
||||
// CRITICAL: Use Box TLS-SLL API (C7-safe, no race)
|
||||
|
||||
if (!p)
|
||||
break;
|
||||
|
||||
if (!tls_sll_push(class_idx, p, sll_cap)) {
|
||||
// SLL full (should not happen, room was checked)
|
||||
// Rollback: need to return block to meta (complex, just break)
|
||||
// SLL full; stop without complex rollback.
|
||||
break;
|
||||
}
|
||||
|
||||
taken++;
|
||||
}
|
||||
|
||||
return taken;
|
||||
}
|
||||
#endif // !HAKMEM_TINY_P0_BATCH_REFILL
|
||||
|
||||
// Ultra-Bump TLS shadow try: returns pointer when a TLS bump window is armed
|
||||
// or can be armed by reserving a small chunk from the current SuperSlab meta.
|
||||
@ -499,7 +399,7 @@ static inline void* superslab_tls_bump_fast(int class_idx) {
|
||||
uint32_t chunk = (g_bump_chunk > 0 ? (uint32_t)g_bump_chunk : 1u);
|
||||
if (chunk > avail) chunk = avail;
|
||||
// Box 3: Get stride and slab base
|
||||
size_t bs = tiny_stride_for_class(tls->ss->size_class);
|
||||
size_t bs = tiny_stride_for_class(tls->meta ? tls->meta->class_idx : 0);
|
||||
uint8_t* base = tls->slab_base ? tls->slab_base : tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||||
if (__builtin_expect(!tiny_linear_carve_guard(tls, meta, bs, chunk, "tls_bump"), 0)) {
|
||||
abort();
|
||||
|
||||
@ -1,39 +1,29 @@
|
||||
// hakmem_tiny_refill_p0.inc.h
|
||||
// ChatGPT Pro P0: Complete Batch Refill (SLL用)
|
||||
//
|
||||
// Purpose: Optimize sll_refill_small_from_ss with batch carving
|
||||
// Based on: tls_refill_from_tls_slab (hakmem_tiny_tls_ops.h:115-126)
|
||||
//
|
||||
// Key optimization: ss_active_inc × 64 → ss_active_add × 1
|
||||
//
|
||||
// Maintains: Existing g_tls_sll_head fast path (no changes to hot path!)
|
||||
//
|
||||
// Enable P0 by default for testing (set to 0 to disable)
|
||||
#ifndef HAKMEM_TINY_P0_BATCH_REFILL
|
||||
#define HAKMEM_TINY_P0_BATCH_REFILL 0
|
||||
#endif
|
||||
|
||||
#ifndef HAKMEM_TINY_REFILL_P0_INC_H
|
||||
#define HAKMEM_TINY_REFILL_P0_INC_H
|
||||
|
||||
#include "tiny_box_geometry.h" // Box 3: Geometry & Capacity Calculator
|
||||
// hakmem_tiny_refill_p0.inc.h
|
||||
// P0: Batch refill implementation (sll_refill_batch_from_ss only).
|
||||
// Phase 12: DO NOT alias or redefine sll_refill_small_from_ss here.
|
||||
// NOTE: This file is active only when HAKMEM_TINY_P0_BATCH_REFILL=1.
|
||||
|
||||
#if HAKMEM_TINY_P0_BATCH_REFILL
|
||||
|
||||
#include "tiny_box_geometry.h" // Box 3: Geometry & Capacity Calculator
|
||||
#include "tiny_refill_opt.h"
|
||||
#include "tiny_fc_api.h"
|
||||
#include "superslab/superslab_inline.h" // For _ss_remote_drain_to_freelist_unsafe()
|
||||
#include "box/integrity_box.h" // Box I: Integrity verification (Priority ALPHA)
|
||||
#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write
|
||||
|
||||
// Debug counters (compile-time gated)
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
extern unsigned long long g_rf_hit_slab[];
|
||||
// Diagnostic counters for refill early returns
|
||||
extern unsigned long long g_rf_early_no_ss[]; // Line 27: !g_use_superslab
|
||||
extern unsigned long long g_rf_early_no_meta[]; // Line 35: !meta
|
||||
extern unsigned long long g_rf_early_no_room[]; // Line 40: room <= 0
|
||||
extern unsigned long long g_rf_early_want_zero[]; // Line 55: want == 0
|
||||
extern unsigned long long g_rf_early_no_ss[];
|
||||
extern unsigned long long g_rf_early_no_meta[];
|
||||
extern unsigned long long g_rf_early_no_room[];
|
||||
extern unsigned long long g_rf_early_want_zero[];
|
||||
#endif
|
||||
|
||||
// Refill TLS SLL from SuperSlab with batch carving (P0 optimization)
|
||||
#include "tiny_refill_opt.h"
|
||||
#include "tiny_fc_api.h"
|
||||
#include "superslab/superslab_inline.h" // For _ss_remote_drain_to_freelist_unsafe()
|
||||
#include "box/integrity_box.h" // Box I: Integrity verification (Priority ALPHA)
|
||||
#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write
|
||||
// Optional P0 diagnostic logging helper
|
||||
static inline int p0_should_log(void) {
|
||||
static int en = -1;
|
||||
@ -44,6 +34,7 @@ static inline int p0_should_log(void) {
|
||||
return en;
|
||||
}
|
||||
|
||||
// P0 batch refill entry point
|
||||
static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
|
||||
// Phase E1-CORRECT: C7 now has headers, can use P0 batch refill
|
||||
|
||||
@ -58,6 +49,7 @@ static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
|
||||
return 0;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
if (!g_use_superslab || max_take <= 0) {
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
if (!g_use_superslab) g_rf_early_no_ss[class_idx]++;
|
||||
@ -71,25 +63,12 @@ static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
|
||||
active_before = atomic_load_explicit(&tls->ss->total_active_blocks, memory_order_relaxed);
|
||||
}
|
||||
|
||||
// CRITICAL DEBUG: Log class 7 pre-warm
|
||||
if (__builtin_expect(class_idx == 7 && p0_should_log(), 0)) {
|
||||
fprintf(stderr, "[P0_DEBUG_C7] Entry: tls->ss=%p tls->meta=%p max_take=%d\n",
|
||||
(void*)tls->ss, (void*)tls->meta, max_take);
|
||||
}
|
||||
|
||||
if (!tls->ss) {
|
||||
// Try to obtain a SuperSlab for this class
|
||||
if (superslab_refill(class_idx) == NULL) {
|
||||
if (__builtin_expect(class_idx == 7 && p0_should_log(), 0)) {
|
||||
fprintf(stderr, "[P0_DEBUG_C7] superslab_refill() returned NULL\n");
|
||||
}
|
||||
if (!superslab_refill(class_idx)) {
|
||||
return 0;
|
||||
}
|
||||
if (__builtin_expect(class_idx == 7 && p0_should_log(), 0)) {
|
||||
fprintf(stderr, "[P0_DEBUG_C7] After superslab_refill(): tls->ss=%p tls->meta=%p\n",
|
||||
(void*)tls->ss, (void*)tls->meta);
|
||||
}
|
||||
}
|
||||
|
||||
TinySlabMeta* meta = tls->meta;
|
||||
if (!meta) {
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
@ -98,48 +77,38 @@ static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* BOX_BOUNDARY: Box 2 (Refill) → Box I (Integrity Check) */
|
||||
#if HAKMEM_INTEGRITY_LEVEL >= 4
|
||||
uint8_t* initial_slab_base = tls->slab_base ? tls->slab_base : tiny_slab_base_for(tls->ss, tls->slab_idx);
|
||||
SlabMetadataState meta_initial = integrity_capture_slab_metadata(meta, initial_slab_base, class_idx);
|
||||
#if HAKMEM_INTEGRITY_LEVEL >= 4
|
||||
uint8_t* initial_slab_base =
|
||||
tls->slab_base ? tls->slab_base : tiny_slab_base_for(tls->ss, tls->slab_idx);
|
||||
SlabMetadataState meta_initial =
|
||||
integrity_capture_slab_metadata(meta, initial_slab_base, class_idx);
|
||||
INTEGRITY_CHECK_SLAB_METADATA(meta_initial, "P0 refill entry");
|
||||
#endif
|
||||
/* BOX_BOUNDARY: Box I → Box 2 (Integrity Verified) */
|
||||
#endif
|
||||
|
||||
if (!meta) {
|
||||
if (__builtin_expect(class_idx == 7 && p0_should_log(), 0)) {
|
||||
fprintf(stderr, "[P0_DEBUG_C7] meta is NULL after superslab_refill, returning 0\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Optional: Direct-FC fast path for class 5 (256B) / class 7 (1024B)
|
||||
// env:
|
||||
// - HAKMEM_TINY_P0_DIRECT_FC (default ON for class5)
|
||||
// - HAKMEM_TINY_P0_DIRECT_FC_C7 (default OFF for class7)
|
||||
// Optional: Direct-FC fast path (kept as-is from original P0, no aliasing)
|
||||
do {
|
||||
static int g_direct_fc = -1;
|
||||
static int g_direct_fc_c7 = -1;
|
||||
if (__builtin_expect(g_direct_fc == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_P0_DIRECT_FC");
|
||||
// Default ON when unset
|
||||
g_direct_fc = (e && *e && *e == '0') ? 0 : 1;
|
||||
}
|
||||
if (__builtin_expect(g_direct_fc_c7 == -1, 0)) {
|
||||
const char* e7 = getenv("HAKMEM_TINY_P0_DIRECT_FC_C7");
|
||||
// Default OFF for class7 (1KB) until stability is fully verified; opt-in via env
|
||||
g_direct_fc_c7 = (e7 && *e7) ? ((*e7 == '0') ? 0 : 1) : 0;
|
||||
}
|
||||
if (__builtin_expect((g_direct_fc && class_idx == 5) || (g_direct_fc_c7 && class_idx == 7), 0)) {
|
||||
if (__builtin_expect((g_direct_fc && class_idx == 5) ||
|
||||
(g_direct_fc_c7 && class_idx == 7), 0)) {
|
||||
int room = tiny_fc_room(class_idx);
|
||||
if (room <= 0) return 0;
|
||||
// Drain only if above threshold
|
||||
uint32_t rmt = atomic_load_explicit(&tls->ss->remote_counts[tls->slab_idx], memory_order_relaxed);
|
||||
|
||||
uint32_t rmt = atomic_load_explicit(
|
||||
&tls->ss->remote_counts[tls->slab_idx], memory_order_relaxed);
|
||||
static int g_drain_th = -1;
|
||||
if (__builtin_expect(g_drain_th == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_P0_DRAIN_THRESH");
|
||||
g_drain_th = (e && *e) ? atoi(e) : 64;
|
||||
if (g_drain_th < 0) g_drain_th = 0;
|
||||
int v = (e && *e) ? atoi(e) : 64;
|
||||
g_drain_th = (v < 0) ? 0 : v;
|
||||
}
|
||||
if (rmt >= (uint32_t)g_drain_th) {
|
||||
static int no_drain = -1;
|
||||
@ -148,54 +117,47 @@ static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
|
||||
no_drain = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
if (!no_drain) {
|
||||
_ss_remote_drain_to_freelist_unsafe(tls->ss, tls->slab_idx, tls->meta);
|
||||
_ss_remote_drain_to_freelist_unsafe(
|
||||
tls->ss, tls->slab_idx, tls->meta);
|
||||
}
|
||||
}
|
||||
// Gather pointers without writing into objects
|
||||
void* out[128]; int produced = 0;
|
||||
|
||||
void* out[128];
|
||||
int produced = 0;
|
||||
TinySlabMeta* m = tls->meta;
|
||||
// Box 3: Get stride (block size + header, except C7 which is headerless)
|
||||
size_t bs = tiny_stride_for_class(class_idx);
|
||||
uint8_t* base = tls->slab_base ? tls->slab_base : tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||||
uint8_t* base = tls->slab_base
|
||||
? tls->slab_base
|
||||
: tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||||
while (produced < room) {
|
||||
if (__builtin_expect(m->freelist != NULL, 0)) {
|
||||
// Phase E1-CORRECT: Use Box API for freelist next pointer read
|
||||
void* p = m->freelist; m->freelist = tiny_next_read(class_idx, p); m->used++;
|
||||
if (m->freelist) {
|
||||
void* p = m->freelist;
|
||||
m->freelist = tiny_next_read(class_idx, p);
|
||||
m->used++;
|
||||
out[produced++] = p;
|
||||
continue;
|
||||
}
|
||||
if (__builtin_expect(m->carved < m->capacity, 1)) {
|
||||
} else if (m->carved < m->capacity) {
|
||||
void* p = (void*)(base + ((size_t)m->carved * bs));
|
||||
m->carved++; m->used++;
|
||||
m->carved++;
|
||||
m->used++;
|
||||
out[produced++] = p;
|
||||
continue;
|
||||
} else {
|
||||
if (!superslab_refill(class_idx)) break;
|
||||
tls = &g_tls_slabs[class_idx];
|
||||
m = tls->meta;
|
||||
base = tls->slab_base
|
||||
? tls->slab_base
|
||||
: tiny_slab_base_for(tls->ss, tls->slab_idx);
|
||||
}
|
||||
// Need to move to another slab with space
|
||||
if (__builtin_expect(superslab_refill(class_idx) == NULL, 0)) break;
|
||||
// Rebind
|
||||
tls = &g_tls_slabs[class_idx];
|
||||
m = tls->meta;
|
||||
base = tls->slab_base ? tls->slab_base : tiny_slab_base_for(tls->ss, tls->slab_idx);
|
||||
}
|
||||
if (produced > 0) {
|
||||
ss_active_add(tls->ss, (uint32_t)produced);
|
||||
int pushed = tiny_fc_push_bulk(class_idx, out, produced);
|
||||
(void)pushed; // roomに合わせているので一致するはず
|
||||
if (p0_should_log()) {
|
||||
static _Atomic int g_logged = 0;
|
||||
int exp = 0;
|
||||
if (atomic_compare_exchange_strong(&g_logged, &exp, 1)) {
|
||||
fprintf(stderr, "[P0_DIRECT_FC_TAKE] cls=%d take=%d room=%d drain_th=%d remote_cnt=%u\n",
|
||||
class_idx, produced, room, g_drain_th, rmt);
|
||||
}
|
||||
}
|
||||
(void)tiny_fc_push_bulk(class_idx, out, produced);
|
||||
return produced;
|
||||
}
|
||||
// fallthrough to regular path
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// Compute how many we can actually push into SLL without overflow
|
||||
uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP);
|
||||
int room = (int)sll_cap - (int)g_tls_sll_count[class_idx];
|
||||
if (room <= 0) {
|
||||
@ -205,28 +167,7 @@ static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// For hot tiny classes (0..3), allow an env override to increase batch size
|
||||
uint32_t want = (uint32_t)max_take;
|
||||
if (class_idx <= 3) {
|
||||
static int g_hot_override = -2; // -2 = uninitialized, -1 = no override, >0 = value
|
||||
if (__builtin_expect(g_hot_override == -2, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_REFILL_COUNT_HOT");
|
||||
int v = (e && *e) ? atoi(e) : -1;
|
||||
if (v < 0) v = -1; if (v > 256) v = 256; // clamp
|
||||
g_hot_override = v;
|
||||
}
|
||||
if (g_hot_override > 0) want = (uint32_t)g_hot_override;
|
||||
} else {
|
||||
// Mid classes (>=4): optional override for batch size
|
||||
static int g_mid_override = -2; // -2 = uninitialized, -1 = no override, >0 = value
|
||||
if (__builtin_expect(g_mid_override == -2, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_REFILL_COUNT_MID");
|
||||
int v = (e && *e) ? atoi(e) : -1;
|
||||
if (v < 0) v = -1; if (v > 256) v = 256; // clamp
|
||||
g_mid_override = v;
|
||||
}
|
||||
if (g_mid_override > 0) want = (uint32_t)g_mid_override;
|
||||
}
|
||||
if (want > (uint32_t)room) want = (uint32_t)room;
|
||||
if (want == 0) {
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
@ -235,31 +176,23 @@ static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Box 3: Get stride (block size + header, except C7 which is headerless)
|
||||
size_t bs = tiny_stride_for_class(class_idx);
|
||||
int total_taken = 0;
|
||||
|
||||
// === P0 Batch Carving Loop ===
|
||||
while (want > 0) {
|
||||
// Calculate slab base for validation (accounts for 2048 offset in slab 0)
|
||||
uintptr_t ss_base = 0;
|
||||
uintptr_t ss_limit = 0;
|
||||
if (tls->ss && tls->slab_idx >= 0) {
|
||||
// Box 3: Get slab base (handles Slab 0 offset)
|
||||
uint8_t* slab_base = tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||||
uint8_t* slab_base =
|
||||
tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||||
ss_base = (uintptr_t)slab_base;
|
||||
// Box 3: Get usable bytes for limit calculation
|
||||
ss_limit = ss_base + tiny_usable_bytes_for_slab(tls->slab_idx);
|
||||
}
|
||||
|
||||
// CRITICAL FIX: Drain remote queue BEFORE popping from freelist
|
||||
// Without this, blocks in both freelist and remote queue can be double-allocated
|
||||
// (Thread A pops from freelist, Thread B adds to remote queue, Thread A drains remote → overwrites user data)
|
||||
// OPTIMIZATION: Only drain if remote queue is non-empty (check atomic counter)
|
||||
if (tls->ss && tls->slab_idx >= 0) {
|
||||
uint32_t remote_count = atomic_load_explicit(&tls->ss->remote_counts[tls->slab_idx], memory_order_relaxed);
|
||||
uint32_t remote_count = atomic_load_explicit(
|
||||
&tls->ss->remote_counts[tls->slab_idx], memory_order_relaxed);
|
||||
if (remote_count > 0) {
|
||||
// Runtime A/B: allow skipping remote drain for切り分け
|
||||
static int no_drain = -1;
|
||||
if (__builtin_expect(no_drain == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_P0_NO_DRAIN");
|
||||
@ -271,51 +204,30 @@ static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
|
||||
}
|
||||
}
|
||||
|
||||
// Handle freelist items first (usually 0)
|
||||
TinyRefillChain chain;
|
||||
uint32_t from_freelist = trc_pop_from_freelist(
|
||||
meta, class_idx, ss_base, ss_limit, bs, want, &chain);
|
||||
if (from_freelist > 0) {
|
||||
trc_splice_to_sll(class_idx, &chain, &g_tls_sll_head[class_idx], &g_tls_sll_count[class_idx]);
|
||||
// FIX: Blocks from freelist were decremented when freed, must increment when allocated
|
||||
trc_splice_to_sll(
|
||||
class_idx, &chain,
|
||||
&g_tls_sll_head[class_idx],
|
||||
&g_tls_sll_count[class_idx]);
|
||||
ss_active_add(tls->ss, from_freelist);
|
||||
// FIX: Keep TinySlabMeta::used consistent with non-P0 path
|
||||
meta->used = (uint16_t)((uint32_t)meta->used + from_freelist);
|
||||
|
||||
/* BOX_BOUNDARY: Box 2 → Box I (Verify metadata after freelist pop) */
|
||||
#if HAKMEM_INTEGRITY_LEVEL >= 4
|
||||
SlabMetadataState meta_after_freelist = integrity_capture_slab_metadata(
|
||||
meta, ss_base, class_idx);
|
||||
INTEGRITY_CHECK_SLAB_METADATA(meta_after_freelist, "P0 after freelist pop");
|
||||
#endif
|
||||
/* BOX_BOUNDARY: Box I → Box 2 */
|
||||
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
extern unsigned long long g_rf_freelist_items[];
|
||||
g_rf_freelist_items[class_idx] += from_freelist;
|
||||
#endif
|
||||
total_taken += from_freelist;
|
||||
want -= from_freelist;
|
||||
if (want == 0) break;
|
||||
}
|
||||
|
||||
// === Linear Carve (P0 Key Optimization!) ===
|
||||
// Use monotonic 'carved' to track linear progression (used can decrement on free)
|
||||
if (meta->carved >= meta->capacity) {
|
||||
// Slab exhausted, try to get another
|
||||
if (superslab_refill(class_idx) == NULL) break;
|
||||
// CRITICAL FIX: Reload tls pointer after superslab_refill() binds new slab
|
||||
if (!superslab_refill(class_idx)) break;
|
||||
tls = &g_tls_slabs[class_idx];
|
||||
meta = tls->meta;
|
||||
if (!meta) break;
|
||||
|
||||
/* BOX_BOUNDARY: Box 2 → Box I (Verify new slab after superslab_refill) */
|
||||
#if HAKMEM_INTEGRITY_LEVEL >= 4
|
||||
uint8_t* new_slab_base = tls->slab_base ? tls->slab_base : tiny_slab_base_for(tls->ss, tls->slab_idx);
|
||||
SlabMetadataState meta_after_refill = integrity_capture_slab_metadata(
|
||||
meta, new_slab_base, class_idx);
|
||||
INTEGRITY_CHECK_SLAB_METADATA(meta_after_refill, "P0 after superslab_refill");
|
||||
#endif
|
||||
/* BOX_BOUNDARY: Box I → Box 2 */
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -324,93 +236,41 @@ static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
|
||||
if (batch > available) batch = available;
|
||||
if (batch == 0) break;
|
||||
|
||||
// Get slab base
|
||||
uint8_t* slab_base = tls->slab_base ? tls->slab_base
|
||||
: tiny_slab_base_for(tls->ss, tls->slab_idx);
|
||||
|
||||
// Diagnostic log (one-shot)
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static _Atomic int g_carve_log_printed = 0;
|
||||
if (atomic_load(&g_carve_log_printed) == 0 &&
|
||||
atomic_exchange(&g_carve_log_printed, 1) == 0) {
|
||||
fprintf(stderr, "[BATCH_CARVE] cls=%u slab=%d used=%u cap=%u batch=%u base=%p bs=%zu\n",
|
||||
class_idx, tls->slab_idx, meta->used, meta->capacity, batch,
|
||||
(void*)slab_base, bs);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
uint8_t* slab_base = tls->slab_base
|
||||
? tls->slab_base
|
||||
: tiny_slab_base_for(tls->ss, tls->slab_idx);
|
||||
|
||||
TinyRefillChain carve;
|
||||
trc_linear_carve(slab_base, bs, meta, batch, class_idx, &carve);
|
||||
|
||||
// One-shot sanity: validate first few nodes are within the slab and stride-aligned
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
do {
|
||||
static _Atomic int g_once = 0;
|
||||
int exp = 0;
|
||||
if (atomic_compare_exchange_strong(&g_once, &exp, 1)) {
|
||||
uintptr_t base_chk = (uintptr_t)(tls->slab_base ? tls->slab_base : tiny_slab_base_for(tls->ss, tls->slab_idx));
|
||||
uintptr_t limit_chk = base_chk + tiny_usable_bytes_for_slab(tls->slab_idx);
|
||||
void* node = carve.head;
|
||||
for (int i = 0; i < 3 && node; i++) {
|
||||
uintptr_t a = (uintptr_t)node;
|
||||
if (!(a >= base_chk && a < limit_chk)) {
|
||||
fprintf(stderr, "[P0_SANITY_FAIL] out_of_range cls=%d node=%p base=%p limit=%p bs=%zu\n",
|
||||
class_idx, node, (void*)base_chk, (void*)limit_chk, bs);
|
||||
abort();
|
||||
}
|
||||
size_t off = (size_t)(a - base_chk);
|
||||
if ((off % bs) != 0) {
|
||||
fprintf(stderr, "[P0_SANITY_FAIL] misaligned cls=%d node=%p off=%zu bs=%zu base=%p\n",
|
||||
class_idx, node, off, bs, (void*)base_chk);
|
||||
abort();
|
||||
}
|
||||
node = tiny_next_read(class_idx, node);
|
||||
}
|
||||
}
|
||||
} while (0);
|
||||
#endif
|
||||
trc_splice_to_sll(class_idx, &carve, &g_tls_sll_head[class_idx], &g_tls_sll_count[class_idx]);
|
||||
// FIX: Update SuperSlab active counter (was missing!)
|
||||
trc_splice_to_sll(
|
||||
class_idx, &carve,
|
||||
&g_tls_sll_head[class_idx],
|
||||
&g_tls_sll_count[class_idx]);
|
||||
ss_active_add(tls->ss, batch);
|
||||
|
||||
/* BOX_BOUNDARY: Box 2 → Box I (Verify metadata after linear carve) */
|
||||
#if HAKMEM_INTEGRITY_LEVEL >= 4
|
||||
SlabMetadataState meta_after_carve = integrity_capture_slab_metadata(
|
||||
meta, slab_base, class_idx);
|
||||
INTEGRITY_CHECK_SLAB_METADATA(meta_after_carve, "P0 after linear carve");
|
||||
#endif
|
||||
/* BOX_BOUNDARY: Box I → Box 2 */
|
||||
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
extern unsigned long long g_rf_carve_items[];
|
||||
g_rf_carve_items[class_idx] += batch;
|
||||
|
||||
#endif
|
||||
total_taken += batch;
|
||||
want -= batch;
|
||||
}
|
||||
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
// Track successful SLL refills from SuperSlab (compile-time gated)
|
||||
// NOTE: Increment unconditionally to verify counter is working
|
||||
g_rf_hit_slab[class_idx]++;
|
||||
#endif
|
||||
|
||||
if (tls->ss && p0_should_log()) {
|
||||
uint32_t active_after = atomic_load_explicit(&tls->ss->total_active_blocks, memory_order_relaxed);
|
||||
int32_t delta = (int32_t)active_after - (int32_t)active_before;
|
||||
if ((int32_t)total_taken != delta) {
|
||||
fprintf(stderr,
|
||||
"[P0_COUNTER_MISMATCH] cls=%d slab=%d taken=%d active_delta=%d used=%u carved=%u cap=%u freelist=%p\n",
|
||||
class_idx, tls->slab_idx, total_taken, delta,
|
||||
(unsigned)meta->used, (unsigned)meta->carved, (unsigned)meta->capacity,
|
||||
meta->freelist);
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"[P0_COUNTER_OK] cls=%d slab=%d taken=%d active_delta=%d\n",
|
||||
class_idx, tls->slab_idx, total_taken, delta);
|
||||
}
|
||||
uint32_t active_after = atomic_load_explicit(
|
||||
&tls->ss->total_active_blocks, memory_order_relaxed);
|
||||
int32_t delta =
|
||||
(int32_t)active_after - (int32_t)active_before;
|
||||
fprintf(stderr,
|
||||
"[P0_COUNTER] cls=%d slab=%d taken=%d active_delta=%d\n",
|
||||
class_idx, tls->slab_idx, total_taken, delta);
|
||||
}
|
||||
|
||||
return total_taken;
|
||||
}
|
||||
|
||||
#endif // HAKMEM_TINY_P0_BATCH_REFILL
|
||||
#endif // HAKMEM_TINY_REFILL_P0_INC_H
|
||||
|
||||
@ -467,10 +467,9 @@ SuperSlab* superslab_allocate(uint8_t size_class) {
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize SuperSlab header (Phase 1 Quick Win: removed memset for lazy init)
|
||||
// Initialize SuperSlab header (Phase 12: no global size_class field)
|
||||
SuperSlab* ss = (SuperSlab*)ptr;
|
||||
ss->magic = SUPERSLAB_MAGIC;
|
||||
ss->size_class = size_class;
|
||||
ss->active_slabs = 0;
|
||||
ss->lg_size = lg; // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB)
|
||||
ss->slab_bitmap = 0;
|
||||
@ -505,7 +504,7 @@ SuperSlab* superslab_allocate(uint8_t size_class) {
|
||||
ss->slabs[i].freelist = NULL; // Explicit NULL (redundant after memset, but clear intent)
|
||||
ss->slabs[i].used = 0;
|
||||
ss->slabs[i].capacity = 0;
|
||||
ss->slabs[i].owner_tid = 0;
|
||||
ss->slabs[i].owner_tid_low = 0;
|
||||
|
||||
// Initialize remote queue atomics (memset already zeroed, but use proper atomic init)
|
||||
atomic_store_explicit(&ss->remote_heads[i], 0, memory_order_relaxed);
|
||||
@ -726,8 +725,8 @@ void superslab_free(SuperSlab* ss) {
|
||||
return;
|
||||
}
|
||||
|
||||
// LRU cache full or disabled - try old cache
|
||||
int old_cached = ss_cache_push(ss->size_class, ss);
|
||||
// LRU cache full or disabled - try old cache using head class_idx (if known)
|
||||
int old_cached = ss_cache_push(0, ss);
|
||||
if (old_cached) {
|
||||
ss_stats_cache_store();
|
||||
return;
|
||||
@ -738,8 +737,8 @@ void superslab_free(SuperSlab* ss) {
|
||||
ss->magic = 0;
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p class=%d size=%zu active=%u (LRU full)\n",
|
||||
(void*)ss, ss->size_class, ss_size,
|
||||
fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n",
|
||||
(void*)ss, ss_size,
|
||||
atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed));
|
||||
#endif
|
||||
|
||||
@ -748,9 +747,7 @@ void superslab_free(SuperSlab* ss) {
|
||||
// Update statistics for actual release to OS
|
||||
pthread_mutex_lock(&g_superslab_lock);
|
||||
g_superslabs_freed++;
|
||||
if (ss->size_class < 8) {
|
||||
g_ss_freed_by_class[ss->size_class]++;
|
||||
}
|
||||
// Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here
|
||||
g_bytes_allocated -= ss_size;
|
||||
pthread_mutex_unlock(&g_superslab_lock);
|
||||
|
||||
@ -782,8 +779,8 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
|
||||
size_t stride = block_size;
|
||||
int capacity = (int)(usable_size / stride);
|
||||
|
||||
// Diagnostic: Verify capacity for class 7 slab 0 (one-shot)
|
||||
if (ss->size_class == 7 && slab_idx == 0) {
|
||||
// Diagnostic: Verify capacity for slab 0 of class 7 (one-shot)
|
||||
if (slab_idx == 0) {
|
||||
static _Atomic int g_cap_log_printed = 0;
|
||||
if (atomic_load(&g_cap_log_printed) == 0 &&
|
||||
atomic_exchange(&g_cap_log_printed, 1) == 0) {
|
||||
@ -808,8 +805,9 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
|
||||
meta->freelist = NULL; // NULL = linear allocation mode
|
||||
meta->used = 0;
|
||||
meta->capacity = (uint16_t)capacity;
|
||||
meta->carved = 0; // FIX: Initialize carved counter (monotonic carve progress)
|
||||
meta->owner_tid = (uint16_t)owner_tid; // FIX: Cast to uint16_t (changed from uint32_t)
|
||||
meta->carved = 0; // Initialize carved counter
|
||||
meta->owner_tid_low = (uint8_t)(owner_tid & 0xFFu);
|
||||
// Caller (refill) is responsible for setting meta->class_idx
|
||||
|
||||
// Store slab_start in SuperSlab for later use
|
||||
// (We need this for linear allocation)
|
||||
@ -872,15 +870,16 @@ void superslab_print_stats(SuperSlab* ss) {
|
||||
|
||||
printf("=== SuperSlab Stats ===\n");
|
||||
printf("Address: %p\n", (void*)ss);
|
||||
printf("Size class: %u\n", ss->size_class);
|
||||
// Phase 12: per-SS size_class removed; classes are per-slab via meta->class_idx.
|
||||
printf("Active slabs: %u / %d\n", ss->active_slabs, ss_slabs_capacity(ss));
|
||||
printf("Bitmap: 0x%08X\n", ss->slab_bitmap);
|
||||
printf("\nPer-slab details:\n");
|
||||
for (int i = 0; i < ss_slabs_capacity(ss); i++) {
|
||||
if (ss->slab_bitmap & (1u << i)) {
|
||||
TinySlabMeta* meta = &ss->slabs[i];
|
||||
printf(" Slab %2d: used=%u/%u freelist=%p owner=%u\n",
|
||||
i, meta->used, meta->capacity, meta->freelist, meta->owner_tid);
|
||||
printf(" Slab %2d: used=%u/%u freelist=%p class=%u owner_tid_low=%u\n",
|
||||
i, meta->used, meta->capacity, meta->freelist,
|
||||
(unsigned)meta->class_idx, (unsigned)meta->owner_tid_low);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
@ -1016,7 +1015,7 @@ static void ace_observe_and_decide(int k) {
|
||||
|
||||
// Phase 8.4: Safety check - skip if ss pointer is invalid
|
||||
if (!e->ss) continue;
|
||||
if (e->ss->size_class != k) continue; // Wrong class
|
||||
// Phase 12: per-SS size_class removed; registry entries are per-class by construction.
|
||||
|
||||
ss_count++;
|
||||
// Phase 8.4: Scan all slabs to count used blocks (zero hot-path overhead)
|
||||
|
||||
@ -62,33 +62,12 @@ static inline size_t tiny_block_stride_for_class(int class_idx) {
|
||||
return bs;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Phase 2a: Dynamic Expansion - Global per-class SuperSlabHeads
|
||||
// ============================================================================
|
||||
|
||||
extern SuperSlabHead* g_superslab_heads[TINY_NUM_CLASSES_SS];
|
||||
|
||||
// ============================================================================
|
||||
// SuperSlab Management Functions
|
||||
// ============================================================================
|
||||
|
||||
// Allocate a new SuperSlab (2MB aligned)
|
||||
SuperSlab* superslab_allocate(uint8_t size_class);
|
||||
|
||||
// Free a SuperSlab
|
||||
void superslab_free(SuperSlab* ss);
|
||||
|
||||
// Phase 2a: Dynamic Expansion Functions
|
||||
// Initialize SuperSlabHead for a class (called once per class)
|
||||
SuperSlabHead* init_superslab_head(int class_idx);
|
||||
|
||||
// Expand SuperSlabHead by allocating and linking a new chunk
|
||||
// Returns 0 on success, -1 on OOM
|
||||
int expand_superslab_head(SuperSlabHead* head);
|
||||
|
||||
// Find which chunk a pointer belongs to
|
||||
// Returns the chunk containing ptr, or NULL if not found
|
||||
SuperSlab* find_chunk_for_ptr(void* ptr, int class_idx);
|
||||
/*
|
||||
* Phase 12:
|
||||
* - Per-class SuperSlabHead / superslab_allocate() are superseded by
|
||||
* the shared SuperSlab pool (hakmem_shared_pool.{h,c}).
|
||||
* - The legacy declarations are removed to avoid accidental use.
|
||||
*/
|
||||
|
||||
// Initialize a slab within SuperSlab
|
||||
void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_t owner_tid);
|
||||
|
||||
@ -200,12 +200,10 @@ static inline void tls_list_spill_excess(int class_idx, TinyTLSList* tls) {
|
||||
handled = 1;
|
||||
} else {
|
||||
void* prev = meta->freelist;
|
||||
// BUG FIX: Use Box API to write next pointer at correct offset
|
||||
tiny_next_write(class_idx, node, prev); // freelist within slab uses base link
|
||||
tiny_next_write(class_idx, node, prev);
|
||||
meta->freelist = node;
|
||||
tiny_failfast_log("tls_spill_ss", ss->size_class, ss, meta, node, prev);
|
||||
tiny_failfast_log("tls_spill_ss", meta->class_idx, ss, meta, node, prev);
|
||||
if (meta->used > 0) meta->used--;
|
||||
// Active was decremented at free time
|
||||
handled = 1;
|
||||
}
|
||||
#if HAKMEM_BUILD_DEBUG
|
||||
|
||||
@ -20,9 +20,9 @@ typedef struct SlabHandle {
|
||||
SuperSlab* ss; // SuperSlab pointer
|
||||
TinySlabMeta* meta; // Cached metadata pointer
|
||||
uint8_t slab_idx; // Slab index within SuperSlab
|
||||
uint32_t owner_tid; // Owner thread ID (cached)
|
||||
uint8_t owner_tid_low; // Owner thread ID (low 8 bits, cached)
|
||||
uint8_t valid; // 1=owned, 0=invalid/unowned
|
||||
uint8_t _pad[3]; // Padding
|
||||
uint8_t _pad[2]; // Padding
|
||||
} SlabHandle;
|
||||
|
||||
// Core operations
|
||||
@ -44,7 +44,7 @@ static inline SlabHandle slab_try_acquire(SuperSlab* ss, int idx, uint32_t tid)
|
||||
|
||||
TinySlabMeta* m = &ss->slabs[idx];
|
||||
|
||||
// Try to acquire ownership (Box 3: Ownership)
|
||||
// Try to acquire ownership (Box 3: Ownership, Phase 12 uses owner_tid_low)
|
||||
if (!ss_owner_try_acquire(m, tid)) {
|
||||
return h; // Failed to acquire
|
||||
}
|
||||
@ -53,14 +53,14 @@ static inline SlabHandle slab_try_acquire(SuperSlab* ss, int idx, uint32_t tid)
|
||||
h.ss = ss;
|
||||
h.meta = m;
|
||||
h.slab_idx = (uint8_t)idx;
|
||||
h.owner_tid = tid;
|
||||
h.owner_tid_low = (uint8_t)tid;
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
uint32_t cur = __atomic_load_n(&m->owner_tid, __ATOMIC_RELAXED);
|
||||
if (cur != tid || cur == 0) {
|
||||
uint8_t cur = __atomic_load_n(&m->owner_tid_low, __ATOMIC_RELAXED);
|
||||
if (cur != h.owner_tid_low || cur == 0) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID,
|
||||
(uint16_t)ss->size_class,
|
||||
(uint16_t)m->class_idx,
|
||||
m,
|
||||
((uintptr_t)cur << 32) | (uintptr_t)tid);
|
||||
((uintptr_t)cur << 32) | (uintptr_t)h.owner_tid_low);
|
||||
// Log the error but don't raise signal in debug builds by default to avoid hangs
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static _Atomic uint64_t g_invalid_owner_count = 0;
|
||||
@ -76,9 +76,9 @@ static inline SlabHandle slab_try_acquire(SuperSlab* ss, int idx, uint32_t tid)
|
||||
h.valid = 0;
|
||||
return h;
|
||||
}
|
||||
uintptr_t aux = ((uintptr_t)h.slab_idx << 32) | (uintptr_t)tid;
|
||||
uintptr_t aux = ((uintptr_t)h.slab_idx << 32) | (uintptr_t)h.owner_tid_low;
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_OWNER_ACQUIRE,
|
||||
(uint16_t)ss->size_class,
|
||||
(uint16_t)m->class_idx,
|
||||
m,
|
||||
aux);
|
||||
}
|
||||
@ -108,11 +108,11 @@ static inline void slab_drain_remote(SlabHandle* h) {
|
||||
}
|
||||
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
uint32_t cur_owner = __atomic_load_n(&h->meta->owner_tid, __ATOMIC_RELAXED);
|
||||
if (cur_owner != h->owner_tid || cur_owner == 0) {
|
||||
uintptr_t aux = ((uintptr_t)cur_owner << 32) | (uintptr_t)h->owner_tid;
|
||||
uint8_t cur_owner = __atomic_load_n(&h->meta->owner_tid_low, __ATOMIC_RELAXED);
|
||||
if (cur_owner != h->owner_tid_low || cur_owner == 0) {
|
||||
uintptr_t aux = ((uintptr_t)cur_owner << 32) | (uintptr_t)h->owner_tid_low;
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID,
|
||||
(uint16_t)h->ss->size_class,
|
||||
(uint16_t)h->meta->class_idx,
|
||||
h->meta,
|
||||
aux);
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
@ -149,7 +149,7 @@ static inline void slab_drain_remote_full(SlabHandle* h) {
|
||||
h->slab_idx,
|
||||
(void*)head,
|
||||
0xA242u,
|
||||
h->owner_tid,
|
||||
h->owner_tid_low,
|
||||
0);
|
||||
}
|
||||
}
|
||||
@ -169,17 +169,17 @@ static inline void slab_release(SlabHandle* h) {
|
||||
}
|
||||
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
uint32_t cur_owner = __atomic_load_n(&h->meta->owner_tid, __ATOMIC_RELAXED);
|
||||
uint8_t cur_owner = __atomic_load_n(&h->meta->owner_tid_low, __ATOMIC_RELAXED);
|
||||
uintptr_t aux = ((uintptr_t)h->slab_idx << 32) | (uintptr_t)cur_owner;
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_OWNER_RELEASE,
|
||||
(uint16_t)(h->ss ? h->ss->size_class : 0u),
|
||||
(uint16_t)(h->meta ? h->meta->class_idx : 0xFFu),
|
||||
h->meta,
|
||||
aux);
|
||||
if (cur_owner != h->owner_tid || cur_owner == 0) {
|
||||
if (cur_owner != h->owner_tid_low || cur_owner == 0) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID,
|
||||
(uint16_t)(h->ss ? h->ss->size_class : 0u),
|
||||
(uint16_t)(h->meta ? h->meta->class_idx : 0xFFu),
|
||||
h->meta,
|
||||
((uintptr_t)cur_owner << 32) | (uintptr_t)h->owner_tid);
|
||||
((uintptr_t)cur_owner << 32) | (uintptr_t)h->owner_tid_low);
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static _Atomic uint64_t g_release_invalid_count = 0;
|
||||
uint64_t count = atomic_fetch_add(&g_release_invalid_count, 1);
|
||||
@ -194,10 +194,10 @@ static inline void slab_release(SlabHandle* h) {
|
||||
}
|
||||
}
|
||||
|
||||
// Release ownership (Box 3: Ownership)
|
||||
__atomic_store_n(&h->meta->owner_tid, 0u, __ATOMIC_RELEASE);
|
||||
// Release ownership (Box 3: Ownership, Phase 12)
|
||||
__atomic_store_n(&h->meta->owner_tid_low, 0u, __ATOMIC_RELEASE);
|
||||
h->valid = 0;
|
||||
h->owner_tid = 0;
|
||||
h->owner_tid_low = 0;
|
||||
}
|
||||
|
||||
// Check if handle is valid (owned and safe to use)
|
||||
@ -243,11 +243,11 @@ static inline int slab_freelist_push(SlabHandle* h, void* ptr) {
|
||||
if ((pval & (sizeof(void*) - 1)) != 0 || (fval && (fval & (sizeof(void*) - 1)) != 0)) {
|
||||
fprintf(stderr,
|
||||
"[SLAB_HANDLE] FREELIST_ALIGN cls=%u slab=%u ptr=%p freelist=%p owner=%u used=%u\n",
|
||||
h->ss ? h->ss->size_class : 0u,
|
||||
h->meta ? h->meta->class_idx : 0u,
|
||||
(unsigned)h->slab_idx,
|
||||
ptr,
|
||||
h->meta->freelist,
|
||||
h->meta->owner_tid,
|
||||
h->meta->owner_tid_low,
|
||||
(unsigned)h->meta->used);
|
||||
}
|
||||
}
|
||||
@ -255,7 +255,7 @@ static inline int slab_freelist_push(SlabHandle* h, void* ptr) {
|
||||
// Ownership guaranteed by valid==1 → safe to modify freelist
|
||||
void* old_freelist = h->meta->freelist; // Store for empty→non-empty detection
|
||||
void* prev = h->meta->freelist;
|
||||
tiny_next_write(h->ss->size_class, ptr, prev); // Box API: next pointer write
|
||||
tiny_next_write(h->meta->class_idx, ptr, prev); // Box API: next pointer write (per-slab class)
|
||||
h->meta->freelist = ptr;
|
||||
// Optional freelist mask update (opt-in via env HAKMEM_TINY_FREELIST_MASK)
|
||||
do {
|
||||
@ -276,8 +276,8 @@ static inline int slab_freelist_push(SlabHandle* h, void* ptr) {
|
||||
uint32_t bit = (1u << h->slab_idx);
|
||||
atomic_fetch_or_explicit(&h->ss->nonempty_mask, bit, memory_order_release);
|
||||
}
|
||||
tiny_remote_watch_note("freelist_push", h->ss, h->slab_idx, ptr, 0xA236u, h->owner_tid, 0);
|
||||
tiny_remote_track_on_local_free(h->ss, h->slab_idx, ptr, "freelist_push", h->owner_tid);
|
||||
tiny_remote_watch_note("freelist_push", h->ss, h->slab_idx, ptr, 0xA236u, h->owner_tid_low, 0);
|
||||
tiny_remote_track_on_local_free(h->ss, h->slab_idx, ptr, "freelist_push", h->owner_tid_low);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -296,7 +296,7 @@ static inline void* slab_freelist_pop(SlabHandle* h) {
|
||||
if (__builtin_expect((uintptr_t)ptr == TINY_REMOTE_SENTINEL, 0)) {
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
fprintf(stderr, "[FREELIST_POP] sentinel detected in freelist (cls=%u slab=%u) -> break chain\n",
|
||||
h->ss ? h->ss->size_class : 0u,
|
||||
h->meta ? h->meta->class_idx : 0u,
|
||||
(unsigned)h->slab_idx);
|
||||
}
|
||||
h->meta->freelist = NULL; // break the chain to avoid propagating corruption
|
||||
@ -304,7 +304,7 @@ static inline void* slab_freelist_pop(SlabHandle* h) {
|
||||
return NULL;
|
||||
}
|
||||
if (ptr) {
|
||||
void* next = tiny_next_read(h->ss->size_class, ptr); // Box API: next pointer read
|
||||
void* next = tiny_next_read(h->meta->class_idx, ptr); // Box API: next pointer read
|
||||
h->meta->freelist = next;
|
||||
h->meta->used++;
|
||||
// Optional freelist mask clear when freelist becomes empty
|
||||
@ -321,9 +321,9 @@ static inline void* slab_freelist_pop(SlabHandle* h) {
|
||||
} while (0);
|
||||
// Keep nonempty_mask sticky to ensure subsequent frees remain discoverable.
|
||||
// Do NOT clear nonempty_mask on transient empty; adopt gate will verify safety.
|
||||
tiny_remote_watch_note("freelist_pop", h->ss, h->slab_idx, ptr, 0xA237u, h->owner_tid, 0);
|
||||
tiny_remote_assert_not_remote(h->ss, h->slab_idx, ptr, "freelist_pop_ret", h->owner_tid);
|
||||
tiny_remote_track_on_alloc(h->ss, h->slab_idx, ptr, "freelist_pop", h->owner_tid);
|
||||
tiny_remote_watch_note("freelist_pop", h->ss, h->slab_idx, ptr, 0xA237u, h->owner_tid_low, 0);
|
||||
tiny_remote_assert_not_remote(h->ss, h->slab_idx, ptr, "freelist_pop_ret", h->owner_tid_low);
|
||||
tiny_remote_track_on_alloc(h->ss, h->slab_idx, ptr, "freelist_pop", h->owner_tid_low);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@ -106,7 +106,9 @@ static inline void tiny_failfast_abort_ptr(const char* stage,
|
||||
fprintf(stderr,
|
||||
"[TRC_FAILFAST_PTR] stage=%s cls=%d slab_idx=%d ptr=%p reason=%s base=%p limit=%p cap=%zu used=%u offset=%zu\n",
|
||||
stage ? stage : "(null)",
|
||||
ss ? (int)ss->size_class : -1,
|
||||
(ss && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss))
|
||||
? (int)ss->slabs[slab_idx].class_idx
|
||||
: -1,
|
||||
slab_idx,
|
||||
ptr,
|
||||
reason ? reason : "(null)",
|
||||
@ -230,7 +232,7 @@ static inline int ss_remote_push(SuperSlab* ss, int slab_idx, void* ptr) {
|
||||
if (!in_range) code |= 0x01u;
|
||||
if (!aligned) code |= 0x02u;
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID,
|
||||
(uint16_t)ss->size_class,
|
||||
(uint16_t)(ss ? ss->slabs[slab_idx].class_idx : 0xFFu),
|
||||
ptr,
|
||||
((uintptr_t)slab_idx << 32) | code);
|
||||
return 0;
|
||||
@ -246,7 +248,7 @@ static inline int ss_remote_push(SuperSlab* ss, int slab_idx, void* ptr) {
|
||||
if (__builtin_expect(g_disable_remote_glob, 0)) {
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
void* prev = meta->freelist;
|
||||
tiny_next_write(ss->size_class, ptr, prev); // Box API: next pointer write
|
||||
tiny_next_write(ss->slabs[slab_idx].class_idx, ptr, prev); // Phase 12: per-slab class
|
||||
meta->freelist = ptr;
|
||||
// Reflect accounting (callers also decrement used; keep idempotent here)
|
||||
ss_active_dec_one(ss);
|
||||
@ -265,7 +267,7 @@ static inline int ss_remote_push(SuperSlab* ss, int slab_idx, void* ptr) {
|
||||
do {
|
||||
old = atomic_load_explicit(head, memory_order_acquire);
|
||||
if (!g_remote_side_enable) {
|
||||
tiny_next_write(ss->size_class, ptr, (void*)old); // Box API: legacy embedding via next pointer
|
||||
tiny_next_write(ss->slabs[slab_idx].class_idx, ptr, (void*)old); // Phase 12: per-slab class
|
||||
}
|
||||
} while (!atomic_compare_exchange_weak_explicit(head, &old, (uintptr_t)ptr,
|
||||
memory_order_release, memory_order_relaxed));
|
||||
@ -282,23 +284,33 @@ static inline int ss_remote_push(SuperSlab* ss, int slab_idx, void* ptr) {
|
||||
int old_al = (old == 0) || ((old & (sizeof(void*) - 1)) == 0);
|
||||
if (!ptr_in || !ptr_al || !old_in || !old_al) {
|
||||
uintptr_t flags = ((uintptr_t)ptr_al << 3) | ((uintptr_t)ptr_in << 2) | ((uintptr_t)old_al << 1) | (uintptr_t)old_in;
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID,
|
||||
(uint16_t)ss->size_class,
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID,
|
||||
(uint16_t)(ss ? ss->slabs[slab_idx].class_idx : 0xFFu),
|
||||
ptr,
|
||||
0xB100u | (flags & 0xFu));
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); }
|
||||
}
|
||||
fprintf(stderr, "[REMOTE_PUSH] cls=%u slab=%d ptr=%p old=%p transitioned=%d\n",
|
||||
ss->size_class, slab_idx, ptr, (void*)old, old == 0);
|
||||
(ss && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss))
|
||||
? ss->slabs[slab_idx].class_idx
|
||||
: 0xFFu,
|
||||
slab_idx,
|
||||
ptr,
|
||||
(void*)old,
|
||||
old == 0);
|
||||
// Pack: [slab_idx<<32 | bit0:old==0 | bit1:old_al | bit2:ptr_al]
|
||||
uintptr_t aux = ((uintptr_t)slab_idx << 32) | ((old == 0) ? 1u : 0u) | ((old_al ? 1u : 0u) << 1) | ((ptr_al ? 1u : 0u) << 2);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_PUSH,
|
||||
(uint16_t)ss->size_class,
|
||||
(uint16_t)((ss && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss))
|
||||
? ss->slabs[slab_idx].class_idx
|
||||
: 0xFFu),
|
||||
ptr,
|
||||
aux);
|
||||
} else {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_PUSH,
|
||||
(uint16_t)ss->size_class,
|
||||
(uint16_t)((ss && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss))
|
||||
? ss->slabs[slab_idx].class_idx
|
||||
: 0xFFu),
|
||||
ptr,
|
||||
((uintptr_t)slab_idx << 32) | (uint32_t)(old == 0));
|
||||
}
|
||||
@ -311,7 +323,8 @@ static inline int ss_remote_push(SuperSlab* ss, int slab_idx, void* ptr) {
|
||||
// First remote observed for this slab: mark slab_listed and notify publisher paths
|
||||
unsigned prev = atomic_exchange_explicit(&ss->slab_listed[slab_idx], 1u, memory_order_acq_rel);
|
||||
(void)prev; // best-effort
|
||||
tiny_publish_notify((int)ss->size_class, ss, slab_idx);
|
||||
// Phase 12: Use per-slab class_idx instead of ss->size_class
|
||||
tiny_publish_notify((int)ss->slabs[slab_idx].class_idx, ss, slab_idx);
|
||||
} else {
|
||||
// Optional: best-effort notify if already non-empty but not listed
|
||||
if (__builtin_expect(g_remote_force_notify, 0)) {
|
||||
@ -319,7 +332,8 @@ static inline int ss_remote_push(SuperSlab* ss, int slab_idx, void* ptr) {
|
||||
if (listed == 0) {
|
||||
unsigned prev = atomic_exchange_explicit(&ss->slab_listed[slab_idx], 1u, memory_order_acq_rel);
|
||||
(void)prev;
|
||||
tiny_publish_notify((int)ss->size_class, ss, slab_idx);
|
||||
// Phase 12: Use per-slab class_idx instead of ss->size_class
|
||||
tiny_publish_notify((int)ss->slabs[slab_idx].class_idx, ss, slab_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -338,7 +352,8 @@ static inline void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_i
|
||||
}
|
||||
if (en) {
|
||||
int exp = 0; if (atomic_compare_exchange_strong(&printed, &exp, 1)) {
|
||||
fprintf(stderr, "[DRAIN_OPT] chain splice active (cls=%u slab=%d)\n", ss ? ss->size_class : 0u, slab_idx);
|
||||
// Phase 12: Use per-slab class_idx
|
||||
fprintf(stderr, "[DRAIN_OPT] chain splice active (cls=%u slab=%d)\n", meta ? meta->class_idx : 0u, slab_idx);
|
||||
}
|
||||
}
|
||||
} while (0);
|
||||
@ -348,8 +363,9 @@ static inline void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_i
|
||||
// Option A: Fail-fast guard against sentinel leaking into freelist
|
||||
if (__builtin_expect(p == TINY_REMOTE_SENTINEL, 0)) {
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
// Phase 12: Use per-slab class_idx
|
||||
fprintf(stderr, "[REMOTE_DRAIN] head is sentinel! cls=%u slab=%d head=%p\n",
|
||||
ss ? ss->size_class : 0u,
|
||||
meta ? meta->class_idx : 0u,
|
||||
slab_idx,
|
||||
(void*)p);
|
||||
}
|
||||
@ -370,13 +386,15 @@ static inline void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_i
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
if (p < base || p >= base + ss_size) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xA210u, base, ss_size, p);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)p, aux);
|
||||
// Phase 12: Use per-slab class_idx
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)meta->class_idx, (void*)p, aux);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||||
break;
|
||||
}
|
||||
if ((p & (uintptr_t)(sizeof(void*) - 1)) != 0) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xA211u, base, ss_size, p);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)p, aux);
|
||||
// Phase 12: Use per-slab class_idx
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)meta->class_idx, (void*)p, aux);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||||
break;
|
||||
}
|
||||
@ -385,8 +403,9 @@ static inline void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_i
|
||||
// Additional defensive check (should be redundant with head guard)
|
||||
if (__builtin_expect((uintptr_t)node == TINY_REMOTE_SENTINEL, 0)) {
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
// Phase 12: Use per-slab class_idx
|
||||
fprintf(stderr, "[REMOTE_DRAIN] node sentinel detected, abort chain (cls=%u slab=%d)\n",
|
||||
ss ? ss->size_class : 0u, slab_idx);
|
||||
meta ? meta->class_idx : 0u, slab_idx);
|
||||
}
|
||||
if (__builtin_expect(g_tiny_safe_free_strict, 0)) { raise(SIGUSR2); }
|
||||
break;
|
||||
@ -396,19 +415,20 @@ static inline void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_i
|
||||
if (__builtin_expect(g_remote_side_enable, 0)) {
|
||||
if (!tiny_remote_sentinel_ok(node)) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xA202u, base, ss_size, (uintptr_t)node);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, node, aux);
|
||||
// Phase 12: Use per-slab class_idx
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)meta->class_idx, node, aux);
|
||||
uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)node, memory_order_relaxed);
|
||||
tiny_remote_report_corruption("drain", node, observed);
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
// Phase 12: Use local meta parameter (no shadowing)
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
fprintf(stderr,
|
||||
"[REMOTE_SENTINEL-DRAIN] cls=%u slab=%d node=%p drained=%u observed=0x%016" PRIxPTR " owner=%u used=%u freelist=%p\n",
|
||||
ss->size_class,
|
||||
meta->class_idx,
|
||||
slab_idx,
|
||||
node,
|
||||
drained,
|
||||
observed,
|
||||
meta->owner_tid,
|
||||
(unsigned)meta->owner_tid_low, // Phase 12: Use owner_tid_low
|
||||
(unsigned)meta->used,
|
||||
meta->freelist);
|
||||
}
|
||||
@ -423,15 +443,18 @@ static inline void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_i
|
||||
if (__builtin_expect(g_debug_remote_guard && drained < 3, 0)) {
|
||||
// First few nodes: record low info for triage
|
||||
uintptr_t aux = ((uintptr_t)slab_idx << 32) | (uintptr_t)(drained & 0xFFFF);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_DRAIN, (uint16_t)ss->size_class, node, aux);
|
||||
// Phase 12: Use per-slab class_idx
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_DRAIN, (uint16_t)meta->class_idx, node, aux);
|
||||
}
|
||||
// Link into local chain (avoid touching meta->freelist per node)
|
||||
if (chain_head == NULL) {
|
||||
chain_head = node;
|
||||
chain_tail = node;
|
||||
tiny_next_write(ss->size_class, node, NULL); // Box API: terminate chain
|
||||
// Phase 12: Use per-slab class_idx
|
||||
tiny_next_write(meta->class_idx, node, NULL); // Box API: terminate chain
|
||||
} else {
|
||||
tiny_next_write(ss->size_class, node, chain_head); // Box API: link to existing chain
|
||||
// Phase 12: Use per-slab class_idx
|
||||
tiny_next_write(meta->class_idx, node, chain_head); // Box API: link to existing chain
|
||||
chain_head = node;
|
||||
}
|
||||
p = next;
|
||||
@ -440,11 +463,13 @@ static inline void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_i
|
||||
// Splice the drained chain into freelist (single meta write)
|
||||
if (chain_head != NULL) {
|
||||
if (chain_tail != NULL) {
|
||||
tiny_next_write(ss->size_class, chain_tail, meta->freelist); // Box API: splice chains
|
||||
// Phase 12: Use per-slab class_idx
|
||||
tiny_next_write(meta->class_idx, chain_tail, meta->freelist); // Box API: splice chains
|
||||
}
|
||||
void* prev = meta->freelist;
|
||||
meta->freelist = chain_head;
|
||||
tiny_failfast_log("remote_drain", ss->size_class, ss, meta, chain_head, prev);
|
||||
// Phase 12: Use per-slab class_idx
|
||||
tiny_failfast_log("remote_drain", meta->class_idx, ss, meta, chain_head, prev);
|
||||
// Optional: set freelist bit when transitioning from empty
|
||||
do {
|
||||
static int g_mask_en = -1;
|
||||
@ -460,8 +485,9 @@ static inline void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_i
|
||||
}
|
||||
// Reset remote count after full drain
|
||||
atomic_store_explicit(&ss->remote_counts[slab_idx], 0u, memory_order_relaxed);
|
||||
// Phase 12: Use per-slab class_idx
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_DRAIN,
|
||||
(uint16_t)ss->size_class,
|
||||
(uint16_t)meta->class_idx,
|
||||
ss,
|
||||
((uintptr_t)slab_idx << 32) | drained);
|
||||
}
|
||||
@ -475,15 +501,17 @@ static inline void ss_remote_drain_to_freelist(SuperSlab* ss, int slab_idx) {
|
||||
|
||||
// Try to acquire exclusive ownership of slab (REQUIRED before draining remote queue!)
|
||||
// Returns 1 on success (now own slab), 0 on failure (another thread owns it)
|
||||
// CRITICAL: Only succeeds if slab is unowned (owner_tid==0) or already owned by us.
|
||||
// CRITICAL: Only succeeds if slab is unowned (owner_tid_low==0) or already owned by us.
|
||||
// Phase 12: Use 8-bit owner_tid_low instead of 16-bit owner_tid
|
||||
static inline int ss_owner_try_acquire(TinySlabMeta* m, uint32_t self_tid) {
|
||||
uint32_t cur = __atomic_load_n(&m->owner_tid, __ATOMIC_RELAXED);
|
||||
if (cur == self_tid) return 1; // Already owner - success
|
||||
uint8_t self_tid_low = (uint8_t)self_tid; // Phase 12: Truncate to 8-bit
|
||||
uint8_t cur = __atomic_load_n(&m->owner_tid_low, __ATOMIC_RELAXED);
|
||||
if (cur == self_tid_low) return 1; // Already owner - success
|
||||
if (cur != 0) return 0; // Another thread owns it - FAIL immediately
|
||||
|
||||
// Slab is unowned (cur==0) - try to claim it
|
||||
uint32_t expected = 0;
|
||||
return __atomic_compare_exchange_n(&m->owner_tid, &expected, self_tid, false,
|
||||
uint8_t expected = 0;
|
||||
return __atomic_compare_exchange_n(&m->owner_tid_low, &expected, self_tid_low, false,
|
||||
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
|
||||
@ -49,7 +49,8 @@ typedef struct TinySlabMeta {
|
||||
uint16_t used; // Blocks currently used
|
||||
uint16_t capacity; // Total blocks in slab
|
||||
uint16_t carved; // Blocks carved from linear region (monotonic, never decrements)
|
||||
uint16_t owner_tid; // Owner thread ID (for same-thread fast path, 16-bit to fit carved)
|
||||
uint8_t class_idx; // Phase 12: dynamic class (0-7 active, 255=UNASSIGNED)
|
||||
uint8_t owner_tid_low; // Phase 12: low 8 bits of owner thread ID
|
||||
// Phase 6.24: freelist == NULL → linear allocation mode (lazy init)
|
||||
// Linear mode: allocate sequentially without building freelist
|
||||
// Freelist mode: use freelist after first free() call
|
||||
@ -60,10 +61,9 @@ typedef struct TinySlabMeta {
|
||||
typedef struct SuperSlab {
|
||||
// Header fields (64B total)
|
||||
uint64_t magic; // Magic number (0xHAKMEM_SUPERSLAB)
|
||||
uint8_t size_class; // Size class (0-7 for 8-64B)
|
||||
uint8_t active_slabs; // Number of active slabs (0-32 for 2MB, 0-16 for 1MB)
|
||||
uint8_t lg_size; // Phase 8.3: ACE - SuperSlab size (20=1MB, 21=2MB)
|
||||
uint8_t _pad0; // Padding
|
||||
uint8_t _pad0; // Padding (Phase 12: reserved, was size_class)
|
||||
uint32_t slab_bitmap; // 32-bit bitmap (1=active, 0=free)
|
||||
_Atomic uint32_t freelist_mask; // Bit i=1 when slab i freelist is non-empty (opt-in)
|
||||
|
||||
@ -106,6 +106,19 @@ typedef struct SuperSlab {
|
||||
|
||||
} __attribute__((aligned(64))) SuperSlab;
|
||||
|
||||
// Phase 12 compatibility helpers
|
||||
// Prefer per-slab class_idx; superslab_get_class() is a temporary shim.
|
||||
static inline uint8_t tiny_slab_class_idx(const SuperSlab* ss, int slab_idx) {
|
||||
if (slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) {
|
||||
return 255; // UNASSIGNED / invalid
|
||||
}
|
||||
return ss->slabs[slab_idx].class_idx;
|
||||
}
|
||||
|
||||
static inline uint8_t superslab_get_class(const SuperSlab* ss, int slab_idx) {
|
||||
return tiny_slab_class_idx(ss, slab_idx);
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Phase 2a: Dynamic Expansion - SuperSlabHead for chunk management
|
||||
// ============================================================================
|
||||
|
||||
@ -42,10 +42,10 @@ static inline void tiny_alloc_dump_tls_state(int class_idx, const char* tag, Tin
|
||||
uint32_t mask = 1u << i;
|
||||
TinySlabMeta* meta = &ss->slabs[i];
|
||||
fprintf(stderr,
|
||||
" slab%02d active=%d used=%u cap=%u freelist=%p owner=%u\n",
|
||||
" slab%02d active=%d used=%u cap=%u freelist=%p owner=%u class=%u\n",
|
||||
i, (ss->slab_bitmap & mask) ? 1 : 0,
|
||||
(unsigned)meta->used, (unsigned)meta->capacity,
|
||||
meta->freelist, meta->owner_tid);
|
||||
meta->freelist, meta->owner_tid_low, meta->class_idx);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -59,12 +59,9 @@ extern void tiny_alloc_fast_push(int class_idx, void* ptr);
|
||||
// Invariant: This check MUST be atomic (no TOCTOU between check and push)
|
||||
static inline int tiny_free_is_same_thread_ss(SuperSlab* ss, int slab_idx, uint32_t my_tid) {
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
|
||||
// Box 3 (Ownership): Load owner_tid atomically
|
||||
uint32_t owner = tiny_atomic_load_u32_relaxed(&meta->owner_tid);
|
||||
|
||||
// Same thread check
|
||||
return (owner == my_tid);
|
||||
uint8_t my_tid_low = (uint8_t)my_tid;
|
||||
uint8_t owner = tiny_atomic_load_u8_relaxed(&meta->owner_tid_low);
|
||||
return (owner == my_tid_low && owner != 0);
|
||||
}
|
||||
|
||||
// Check if ptr belongs to current thread (Legacy TinySlab path)
|
||||
@ -112,8 +109,11 @@ static inline int tiny_free_fast_ss(SuperSlab* ss, int slab_idx, void* base, uin
|
||||
free_ss_debug_count++;
|
||||
int is_same = tiny_free_is_same_thread_ss(ss, slab_idx, my_tid);
|
||||
extern int g_sfc_enabled;
|
||||
fprintf(stderr, "[FREE_SS] base=%p, cls=%d, same_thread=%d, sfc_enabled=%d\n",
|
||||
base, ss->size_class, is_same, g_sfc_enabled);
|
||||
fprintf(stderr, "[FREE_SS] base=%p, cls=%u, same_thread=%d, sfc_enabled=%d\n",
|
||||
base,
|
||||
meta->class_idx,
|
||||
is_same,
|
||||
g_sfc_enabled);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -121,13 +121,13 @@ static inline int tiny_free_fast_ss(SuperSlab* ss, int slab_idx, void* base, uin
|
||||
if (__builtin_expect(!tiny_free_is_same_thread_ss(ss, slab_idx, my_tid), 0)) {
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
// Track cross-thread frees (compile-time gated)
|
||||
g_free_via_ss_remote[ss->size_class]++;
|
||||
g_free_via_ss_remote[meta->class_idx]++;
|
||||
#endif
|
||||
return 0; // Cross-thread → caller should delegate to remote path
|
||||
}
|
||||
|
||||
// Fast path: Same-thread free (2-3 instructions)
|
||||
int class_idx = ss->size_class;
|
||||
int class_idx = meta->class_idx;
|
||||
// Phase E1-CORRECT: base pointer already converted by caller (no double conversion!)
|
||||
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
|
||||
@ -131,7 +131,8 @@
|
||||
continue; // Skip invalid index
|
||||
}
|
||||
TinySlabMeta* meta = &owner_ss->slabs[slab_idx];
|
||||
tiny_next_write(owner_ss->size_class, it.ptr, meta->freelist);
|
||||
// Use per-slab class for freelist linkage (Phase 12)
|
||||
tiny_next_write(meta->class_idx, it.ptr, meta->freelist);
|
||||
meta->freelist = it.ptr;
|
||||
meta->used--;
|
||||
// Decrement SuperSlab active counter (spill returns blocks to SS)
|
||||
@ -323,7 +324,8 @@
|
||||
continue; // Skip invalid index
|
||||
}
|
||||
TinySlabMeta* meta = &ss_owner->slabs[slab_idx];
|
||||
tiny_next_write(ss_owner->size_class, it.ptr, meta->freelist);
|
||||
// Use per-slab class for freelist linkage (Phase 12)
|
||||
tiny_next_write(meta->class_idx, it.ptr, meta->freelist);
|
||||
meta->freelist = it.ptr;
|
||||
meta->used--;
|
||||
// 空SuperSlab処理はフラッシュ/バックグラウンドで対応(ホットパス除外)
|
||||
|
||||
@ -70,7 +70,7 @@ static void tiny_remote_track_log_mismatch(const char* stage,
|
||||
uint32_t tid,
|
||||
const char* prev_stage) {
|
||||
if (!__builtin_expect(g_debug_remote_guard, 0)) return;
|
||||
uint16_t cls = ss ? (uint16_t)ss->size_class : 0;
|
||||
uint16_t cls = 0;
|
||||
uintptr_t base = ss ? (uintptr_t)ss : 0;
|
||||
size_t ss_size = ss ? ((size_t)1ULL << ss->lg_size) : 0;
|
||||
fprintf(stderr,
|
||||
@ -278,7 +278,7 @@ int tiny_remote_guard_allow_local_push(SuperSlab* ss,
|
||||
if (__builtin_expect(g_disable_remote_guard, 0)) return 1;
|
||||
} while (0);
|
||||
if (!__builtin_expect(g_debug_remote_guard, 0)) return 1;
|
||||
uint32_t owner = __atomic_load_n(&meta->owner_tid, __ATOMIC_RELAXED);
|
||||
uint32_t owner = (uint32_t)meta->owner_tid_low;
|
||||
if (owner == self_tid && owner != 0) {
|
||||
return 1;
|
||||
}
|
||||
@ -338,7 +338,7 @@ static void tiny_remote_watch_emit(const char* stage,
|
||||
size_t sz = (size_t)1ULL << ss->lg_size;
|
||||
uint32_t combined = (code & 0xFFFFu) | ((stage_hash & 0xFFFFu) << 16);
|
||||
aux = tiny_remote_pack_diag(combined, base, sz, (uintptr_t)node);
|
||||
cls = (uint16_t)ss->size_class;
|
||||
cls = 0;
|
||||
} else {
|
||||
aux = ((uintptr_t)(code & 0xFFFFu) << 32) | (uintptr_t)(stage_hash & 0xFFFFu);
|
||||
}
|
||||
@ -350,13 +350,12 @@ static void tiny_remote_watch_emit(const char* stage,
|
||||
if (ss && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
fprintf(stderr,
|
||||
"[REMOTE_WATCH] stage=%s code=0x%04x cls=%u slab=%d node=%p owner=%u used=%u freelist=%p tid=0x%08x first_tid=0x%08x\n",
|
||||
"[REMOTE_WATCH] stage=%s code=0x%04x slab=%d node=%p owner_tid_low=%u used=%u freelist=%p tid=0x%08x first_tid=0x%08x\n",
|
||||
stage ? stage : "(null)",
|
||||
(unsigned)code,
|
||||
ss->size_class,
|
||||
slab_idx,
|
||||
node,
|
||||
meta->owner_tid,
|
||||
(unsigned)meta->owner_tid_low,
|
||||
(unsigned)meta->used,
|
||||
meta->freelist,
|
||||
tid,
|
||||
@ -433,8 +432,7 @@ static void tiny_remote_dump_queue_sample(SuperSlab* ss, int slab_idx) {
|
||||
uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed);
|
||||
unsigned rc = atomic_load_explicit(&ss->remote_counts[slab_idx], memory_order_relaxed);
|
||||
fprintf(stderr,
|
||||
"[REMOTE_QUEUE] cls=%u slab=%d head=%p rc=%u\n",
|
||||
ss->size_class,
|
||||
"[REMOTE_QUEUE] slab=%d head=%p rc=%u\n",
|
||||
slab_idx,
|
||||
(void*)head,
|
||||
rc);
|
||||
@ -554,16 +552,15 @@ void tiny_remote_side_set(struct SuperSlab* ss, int slab_idx, void* node, uintpt
|
||||
uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)node, memory_order_relaxed);
|
||||
tiny_remote_report_corruption("dup_push", node, observed);
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xA212u, base, ss_size, (uintptr_t)node);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, node, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0, node, aux);
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
fprintf(stderr,
|
||||
"[REMOTE_DUP_PUSH] cls=%u slab=%d node=%p next=%p observed=0x%016" PRIxPTR " owner=%u rc=%u head=%p\n",
|
||||
ss->size_class,
|
||||
"[REMOTE_DUP_PUSH] slab=%d node=%p next=%p observed=0x%016" PRIxPTR " owner_tid_low=%u rc=%u head=%p\n",
|
||||
slab_idx,
|
||||
node,
|
||||
(void*)next,
|
||||
observed,
|
||||
meta->owner_tid,
|
||||
(unsigned)meta->owner_tid_low,
|
||||
(unsigned)atomic_load_explicit(&ss->remote_counts[slab_idx], memory_order_relaxed),
|
||||
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed));
|
||||
tiny_remote_watch_note("dup_push", ss, slab_idx, node, 0xA234u, 0, 1);
|
||||
|
||||
@ -1,46 +1,40 @@
|
||||
// tiny_superslab_alloc.inc.h - SuperSlab Allocation Layer (Box 4)
|
||||
// Purpose: Slab allocation, refill, and adoption logic
|
||||
// Extracted from: hakmem_tiny_free.inc lines 626-1170
|
||||
// Box Theory: Box 4 (Refill/Adoption) integration
|
||||
//
|
||||
// Purpose: Slab allocation, refill, and adoption logic (Phase 12 shared pool)
|
||||
// Public functions:
|
||||
// - superslab_alloc_from_slab(): Allocate from specific slab (linear or freelist)
|
||||
// - superslab_refill(): Refill TLS slab (adoption, registry scan, fresh alloc)
|
||||
// - superslab_refill(): Refill TLS slab via shared pool
|
||||
// - hak_tiny_alloc_superslab(): Main SuperSlab allocation entry point
|
||||
|
||||
#include "box/superslab_expansion_box.h" // Box E: Expansion with TLS state guarantee
|
||||
#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write
|
||||
|
||||
// ============================================================================
|
||||
// Phase 6.23: SuperSlab Allocation Helpers
|
||||
// ============================================================================
|
||||
|
||||
// Phase 6.24: Allocate from SuperSlab slab (lazy freelist + linear allocation)
|
||||
#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write
|
||||
#include "hakmem_tiny_superslab_constants.h"
|
||||
#include "tiny_box_geometry.h" // Box 3: Geometry & Capacity Calculator
|
||||
#include "tiny_box_geometry.h" // Box 3: Geometry & Capacity Calculator"
|
||||
|
||||
// ============================================================================
|
||||
// Phase 6.24: Allocate from SuperSlab slab (lazy freelist + linear allocation)
|
||||
// ============================================================================
|
||||
|
||||
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
|
||||
// Phase 1 (Small): For hottest tiny classes (C0–C3), prefer strict bump-only
|
||||
// when there is no pending remote and the freelist is empty. This avoids
|
||||
// pointer-chasing and header writes entirely on the common path.
|
||||
// Small hot classes (C0–C3): bump-only fast path if no remote/freelist
|
||||
do {
|
||||
if (__builtin_expect(ss->size_class <= 3, 1)) {
|
||||
// Skip if remote queue has pending nodes
|
||||
uint8_t cls = meta->class_idx;
|
||||
if (__builtin_expect(cls <= 3, 1)) {
|
||||
if (atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0)
|
||||
break;
|
||||
if (meta->freelist == NULL && meta->used < meta->capacity) {
|
||||
size_t unit_sz = tiny_stride_for_class(ss->size_class);
|
||||
size_t unit_sz = tiny_stride_for_class(cls);
|
||||
uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
|
||||
void* block = tiny_block_at_index(base, meta->used, unit_sz);
|
||||
meta->used++;
|
||||
ss_active_inc(ss);
|
||||
HAK_RET_ALLOC(ss->size_class, block);
|
||||
HAK_RET_ALLOC(cls, block);
|
||||
}
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// Ensure remote queue is drained before handing blocks back to TLS (UNLIKELY in 1T)
|
||||
// Drain remote queue if needed before handing blocks back to TLS
|
||||
if (__builtin_expect(atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0, 0)) {
|
||||
uint32_t self_tid = tiny_self_u32();
|
||||
SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
|
||||
@ -90,20 +84,17 @@ static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 6.24: Linear allocation mode (freelist == NULL)
|
||||
// This avoids the 4000-8000 cycle cost of building freelist on init
|
||||
// Linear allocation mode
|
||||
if (__builtin_expect(meta->freelist == NULL && meta->used < meta->capacity, 1)) {
|
||||
// Box 3: Get stride and slab base
|
||||
size_t unit_sz = tiny_stride_for_class(ss->size_class);
|
||||
size_t unit_sz = tiny_stride_for_class(meta->class_idx);
|
||||
uint8_t* base = tiny_slab_base_for_geometry(ss, slab_idx);
|
||||
void* block_base = tiny_block_at_index(base, meta->used, unit_sz);
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Box 3: Debug safety guard
|
||||
if (__builtin_expect(!tiny_carve_guard(slab_idx, meta->used, unit_sz, 1), 0)) {
|
||||
size_t dbg_usable = tiny_usable_bytes_for_slab(slab_idx);
|
||||
uintptr_t dbg_off = (uintptr_t)((uint8_t*)block_base - base);
|
||||
fprintf(stderr, "[TINY_ALLOC_BOUNDS] cls=%u slab=%d used=%u cap=%u unit=%zu off=%lu usable=%zu\n",
|
||||
ss->size_class, slab_idx, meta->used, meta->capacity, unit_sz,
|
||||
meta->class_idx, slab_idx, meta->used, meta->capacity, unit_sz,
|
||||
(unsigned long)dbg_off, dbg_usable);
|
||||
return NULL;
|
||||
}
|
||||
@ -111,7 +102,7 @@ static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
|
||||
meta->used++;
|
||||
void* user =
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
tiny_region_id_write_header(block_base, ss->size_class);
|
||||
tiny_region_id_write_header(block_base, meta->class_idx);
|
||||
#else
|
||||
block_base;
|
||||
#endif
|
||||
@ -119,53 +110,37 @@ static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
|
||||
tiny_remote_track_on_alloc(ss, slab_idx, user, "linear_alloc", 0);
|
||||
tiny_remote_assert_not_remote(ss, slab_idx, user, "linear_alloc_ret", 0);
|
||||
}
|
||||
return user; // Fast path: O(1) pointer arithmetic
|
||||
return user;
|
||||
}
|
||||
|
||||
// Freelist mode (after first free())
|
||||
// Freelist mode
|
||||
if (__builtin_expect(meta->freelist != NULL, 0)) {
|
||||
void* block = meta->freelist;
|
||||
|
||||
// CORRUPTION DEBUG: Validate freelist head before popping
|
||||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||||
size_t blk = g_tiny_class_sizes[ss->size_class];
|
||||
size_t blk = g_tiny_class_sizes[meta->class_idx];
|
||||
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
|
||||
uintptr_t block_addr = (uintptr_t)block;
|
||||
uintptr_t slab_addr = (uintptr_t)slab_base;
|
||||
uintptr_t offset = block_addr - slab_addr;
|
||||
|
||||
fprintf(stderr, "[ALLOC_POP] cls=%u slab=%d block=%p offset=%zu (used=%u cap=%u)\n",
|
||||
ss->size_class, slab_idx, block, offset, meta->used, meta->capacity);
|
||||
meta->class_idx, slab_idx, block, offset, meta->used, meta->capacity);
|
||||
|
||||
if (offset % blk != 0) {
|
||||
fprintf(stderr, "[ALLOC_CORRUPT] Freelist head is misaligned! block=%p offset=%zu blk=%zu\n",
|
||||
block, offset, blk);
|
||||
fprintf(stderr, "[ALLOC_CORRUPT] Expected alignment: %zu, actual: %zu\n",
|
||||
blk, offset % blk);
|
||||
tiny_failfast_abort_ptr("alloc_pop_misalign", ss, slab_idx, block, "freelist_head_corrupt");
|
||||
}
|
||||
|
||||
size_t index = offset / blk;
|
||||
if (index >= meta->capacity) {
|
||||
fprintf(stderr, "[ALLOC_CORRUPT] Freelist head out of bounds! block=%p index=%zu cap=%u\n",
|
||||
block, index, meta->capacity);
|
||||
tiny_failfast_abort_ptr("alloc_pop_oob", ss, slab_idx, block, "freelist_head_oob");
|
||||
if (offset % blk != 0 ||
|
||||
offset / blk >= meta->capacity) {
|
||||
fprintf(stderr, "[ALLOC_CORRUPT] Freelist head invalid\n");
|
||||
tiny_failfast_abort_ptr("alloc_pop_invalid", ss, slab_idx, block, "freelist_head_corrupt");
|
||||
}
|
||||
}
|
||||
|
||||
meta->freelist = tiny_next_read(ss->size_class, block); // Pop from freelist
|
||||
meta->freelist = tiny_next_read(meta->class_idx, block);
|
||||
meta->used++;
|
||||
|
||||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||||
if (__builtin_expect(meta->used > meta->capacity, 0)) {
|
||||
fprintf(stderr, "[ALLOC_CORRUPT] meta->used overflow on freelist alloc: used=%u cap=%u cls=%u slab=%d\n",
|
||||
meta->used, meta->capacity, ss->size_class, slab_idx);
|
||||
tiny_failfast_abort_ptr("alloc_used_overflow",
|
||||
ss,
|
||||
slab_idx,
|
||||
block,
|
||||
"freelist_used_over_capacity");
|
||||
}
|
||||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0) &&
|
||||
__builtin_expect(meta->used > meta->capacity, 0)) {
|
||||
fprintf(stderr, "[ALLOC_CORRUPT] meta->used overflow on freelist alloc\n");
|
||||
tiny_failfast_abort_ptr("alloc_used_overflow", ss, slab_idx, block, "freelist_used_over_capacity");
|
||||
}
|
||||
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
@ -175,398 +150,56 @@ static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
|
||||
return block;
|
||||
}
|
||||
|
||||
return NULL; // Slab is full
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Adopt helper: acquire → drain → bind (single boundary) – returns 1 on success
|
||||
static inline int adopt_bind_if_safe(TinyTLSSlab* tls, SuperSlab* ss, int slab_idx, int class_idx) {
|
||||
uint32_t self_tid = tiny_self_u32();
|
||||
SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
|
||||
if (!slab_is_valid(&h)) return 0;
|
||||
slab_drain_remote_full(&h);
|
||||
if (__builtin_expect(slab_is_safe_to_bind(&h), 1)) {
|
||||
// Optional: move a few nodes to Front SLL to boost next hits
|
||||
tiny_drain_freelist_to_sll_once(h.ss, h.slab_idx, class_idx);
|
||||
tiny_tls_bind_slab(tls, h.ss, h.slab_idx);
|
||||
// Ownership now associated with TLS slab; release handle bookkeeping
|
||||
slab_release(&h);
|
||||
return 1;
|
||||
}
|
||||
slab_release(&h);
|
||||
return 0;
|
||||
}
|
||||
// ============================================================================
|
||||
// Phase 12: Shared SuperSlab Pool based superslab_refill
|
||||
// ============================================================================
|
||||
|
||||
// Phase 6.24 & 7.6: Refill TLS SuperSlab (with unified TLS cache + deferred allocation)
|
||||
SuperSlab* superslab_refill(int class_idx) {
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
g_superslab_refill_calls_dbg[class_idx]++;
|
||||
#endif
|
||||
|
||||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||||
extern int shared_pool_acquire_slab(int class_idx, SuperSlab** ss_out, int* slab_idx_out);
|
||||
|
||||
// ============================================================================
|
||||
// Phase 2a: Dynamic Expansion - Initialize SuperSlabHead if needed
|
||||
// ============================================================================
|
||||
extern SuperSlabHead* g_superslab_heads[TINY_NUM_CLASSES_SS];
|
||||
extern SuperSlabHead* init_superslab_head(int class_idx);
|
||||
extern int expand_superslab_head(SuperSlabHead* head);
|
||||
|
||||
SuperSlabHead* head = g_superslab_heads[class_idx];
|
||||
if (!head) {
|
||||
// First-time initialization for this class
|
||||
head = init_superslab_head(class_idx);
|
||||
if (!head) {
|
||||
extern __thread int g_hakmem_lock_depth;
|
||||
g_hakmem_lock_depth++;
|
||||
fprintf(stderr, "[DEBUG] superslab_refill: Failed to init SuperSlabHead for class %d\n", class_idx);
|
||||
g_hakmem_lock_depth--;
|
||||
return NULL; // Critical failure
|
||||
}
|
||||
g_superslab_heads[class_idx] = head;
|
||||
SuperSlab* ss = NULL;
|
||||
int slab_idx = -1;
|
||||
if (shared_pool_acquire_slab(class_idx, &ss, &slab_idx) != 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Try current chunk first (fast path)
|
||||
SuperSlab* current_chunk = head->current_chunk;
|
||||
if (current_chunk) {
|
||||
// Check if current chunk has available slabs
|
||||
// Bitmap semantics: 0=FREE, 1=OCCUPIED
|
||||
// - 0x00000000 = all free (32 available)
|
||||
// - 0xFFFFFFFF = all occupied (0 available)
|
||||
int chunk_cap = ss_slabs_capacity(current_chunk);
|
||||
uint32_t full_mask = (chunk_cap >= 32) ? 0xFFFFFFFF : ((1U << chunk_cap) - 1);
|
||||
|
||||
if (current_chunk->slab_bitmap != full_mask) {
|
||||
// Current chunk has free slabs, use normal refill logic below
|
||||
// (Will be handled by existing code that checks tls->ss)
|
||||
if (tls->ss != current_chunk) {
|
||||
// Update TLS to point to current chunk
|
||||
tls->ss = current_chunk;
|
||||
}
|
||||
} else {
|
||||
// Current chunk exhausted (all slabs occupied), try to expand
|
||||
#if !defined(NDEBUG) || defined(HAKMEM_SUPERSLAB_VERBOSE)
|
||||
extern __thread int g_hakmem_lock_depth;
|
||||
g_hakmem_lock_depth++;
|
||||
fprintf(stderr, "[HAKMEM] SuperSlab chunk exhausted for class %d (bitmap=0x%08x), expanding...\n",
|
||||
class_idx, current_chunk->slab_bitmap);
|
||||
g_hakmem_lock_depth--;
|
||||
#endif
|
||||
|
||||
/* BOX_BOUNDARY: Box 4 → Box E (SuperSlab Expansion) */
|
||||
extern __thread TinyTLSSlab g_tls_slabs[];
|
||||
if (!expansion_safe_expand(head, class_idx, g_tls_slabs)) {
|
||||
// Expansion failed (OOM or capacity limit)
|
||||
#if !defined(NDEBUG) || defined(HAKMEM_SUPERSLAB_VERBOSE)
|
||||
g_hakmem_lock_depth++;
|
||||
fprintf(stderr, "[HAKMEM] CRITICAL: Failed to expand SuperSlabHead for class %d (system OOM)\n", class_idx);
|
||||
g_hakmem_lock_depth--;
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
/* BOX_BOUNDARY: Box E → Box 4 (TLS state guaranteed) */
|
||||
|
||||
// TLS state is now correct, reload local pointers
|
||||
tls = &g_tls_slabs[class_idx];
|
||||
current_chunk = tls->ss;
|
||||
|
||||
#if !defined(NDEBUG) || defined(HAKMEM_SUPERSLAB_VERBOSE)
|
||||
g_hakmem_lock_depth++;
|
||||
fprintf(stderr, "[HAKMEM] Successfully expanded SuperSlabHead for class %d\n", class_idx);
|
||||
fprintf(stderr, "[HAKMEM] Box E bound slab 0: meta=%p slab_base=%p capacity=%u\n",
|
||||
(void*)tls->meta, (void*)tls->slab_base, tls->meta ? tls->meta->capacity : 0);
|
||||
g_hakmem_lock_depth--;
|
||||
#endif
|
||||
|
||||
// CRITICAL: Box E already initialized and bound slab 0
|
||||
// Return immediately to avoid double-initialization in refill logic
|
||||
if (tls->meta && tls->slab_base) {
|
||||
// Verify slab 0 is properly initialized
|
||||
if (tls->slab_idx == 0 && tls->meta->capacity > 0) {
|
||||
#if !defined(NDEBUG) || defined(HAKMEM_SUPERSLAB_VERBOSE)
|
||||
g_hakmem_lock_depth++;
|
||||
fprintf(stderr, "[HAKMEM] Returning new chunk with bound slab 0 (capacity=%u)\n", tls->meta->capacity);
|
||||
g_hakmem_lock_depth--;
|
||||
#endif
|
||||
return tls->ss;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify chunk has free slabs (fallback safety check)
|
||||
uint32_t full_mask_check = (ss_slabs_capacity(current_chunk) >= 32) ? 0xFFFFFFFF :
|
||||
((1U << ss_slabs_capacity(current_chunk)) - 1);
|
||||
if (!current_chunk || current_chunk->slab_bitmap == full_mask_check) {
|
||||
#if !defined(NDEBUG) || defined(HAKMEM_SUPERSLAB_VERBOSE)
|
||||
g_hakmem_lock_depth++;
|
||||
fprintf(stderr, "[HAKMEM] CRITICAL: Chunk still has no free slabs for class %d after expansion\n", class_idx);
|
||||
g_hakmem_lock_depth--;
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Continue with existing refill logic
|
||||
// ============================================================================
|
||||
static int g_ss_adopt_en = -1; // env: HAKMEM_TINY_SS_ADOPT=1; default auto-on if remote seen
|
||||
if (g_ss_adopt_en == -1) {
|
||||
char* e = getenv("HAKMEM_TINY_SS_ADOPT");
|
||||
if (e) {
|
||||
g_ss_adopt_en = (*e != '0') ? 1 : 0;
|
||||
} else {
|
||||
extern _Atomic int g_ss_remote_seen;
|
||||
g_ss_adopt_en = (atomic_load_explicit(&g_ss_remote_seen, memory_order_relaxed) != 0) ? 1 : 0;
|
||||
}
|
||||
}
|
||||
extern int g_adopt_cool_period;
|
||||
extern __thread int g_tls_adopt_cd[];
|
||||
if (g_adopt_cool_period == -1) {
|
||||
char* cd = getenv("HAKMEM_TINY_SS_ADOPT_COOLDOWN");
|
||||
int v = (cd ? atoi(cd) : 0);
|
||||
if (v < 0) v = 0; if (v > 1024) v = 1024;
|
||||
g_adopt_cool_period = v;
|
||||
}
|
||||
|
||||
static int g_superslab_refill_debug_once = 0;
|
||||
SuperSlab* prev_ss = tls->ss;
|
||||
TinySlabMeta* prev_meta = tls->meta;
|
||||
uint8_t prev_slab_idx = tls->slab_idx;
|
||||
uint8_t prev_active = prev_ss ? prev_ss->active_slabs : 0;
|
||||
uint32_t prev_bitmap = prev_ss ? prev_ss->slab_bitmap : 0;
|
||||
uint32_t prev_meta_used = prev_meta ? prev_meta->used : 0;
|
||||
uint32_t prev_meta_cap = prev_meta ? prev_meta->capacity : 0;
|
||||
int free_idx_attempted = -2; // -2 = not evaluated, -1 = none, >=0 = chosen
|
||||
int reused_slabs = 0;
|
||||
|
||||
// Optional: Mid-size simple refill to avoid multi-layer scans (class>=4)
|
||||
do {
|
||||
static int g_mid_simple_warn = 0;
|
||||
if (class_idx >= 4 && tiny_mid_refill_simple_enabled()) {
|
||||
// If current TLS has a SuperSlab, prefer taking a virgin slab directly
|
||||
if (tls->ss) {
|
||||
int tls_cap = ss_slabs_capacity(tls->ss);
|
||||
if (tls->ss->active_slabs < tls_cap) {
|
||||
int free_idx = superslab_find_free_slab(tls->ss);
|
||||
if (free_idx >= 0) {
|
||||
uint32_t my_tid = tiny_self_u32();
|
||||
superslab_init_slab(tls->ss, free_idx, g_tiny_class_sizes[class_idx], my_tid);
|
||||
tiny_tls_bind_slab(tls, tls->ss, free_idx);
|
||||
return tls->ss;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Otherwise allocate a fresh SuperSlab and bind first slab
|
||||
SuperSlab* ssn = superslab_allocate((uint8_t)class_idx);
|
||||
if (!ssn) {
|
||||
if (!g_superslab_refill_debug_once && g_mid_simple_warn < 2) {
|
||||
g_mid_simple_warn++;
|
||||
int err = errno;
|
||||
fprintf(stderr, "[DEBUG] mid_simple_refill OOM class=%d errno=%d\n", class_idx, err);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
uint32_t my_tid = tiny_self_u32();
|
||||
superslab_init_slab(ssn, 0, g_tiny_class_sizes[class_idx], my_tid);
|
||||
SuperSlab* old = tls->ss;
|
||||
tiny_tls_bind_slab(tls, ssn, 0);
|
||||
superslab_ref_inc(ssn);
|
||||
if (old && old != ssn) { superslab_ref_dec(old); }
|
||||
return ssn;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
|
||||
// First, try to adopt a published partial SuperSlab for this class
|
||||
if (g_ss_adopt_en) {
|
||||
if (g_adopt_cool_period > 0) {
|
||||
if (g_tls_adopt_cd[class_idx] > 0) {
|
||||
g_tls_adopt_cd[class_idx]--;
|
||||
} else {
|
||||
// eligible to adopt
|
||||
}
|
||||
}
|
||||
if (g_adopt_cool_period == 0 || g_tls_adopt_cd[class_idx] == 0) {
|
||||
SuperSlab* adopt = ss_partial_adopt(class_idx);
|
||||
if (adopt && adopt->magic == SUPERSLAB_MAGIC) {
|
||||
// ========================================================================
|
||||
// Quick Win #2: First-Fit Adopt (vs Best-Fit scoring all 32 slabs)
|
||||
// For Larson, any slab with freelist works - no need to score all 32!
|
||||
// Expected improvement: -3,000 cycles (from 32 atomic loads + 32 scores)
|
||||
// ========================================================================
|
||||
int adopt_cap = ss_slabs_capacity(adopt);
|
||||
int best = -1;
|
||||
for (int s = 0; s < adopt_cap; s++) {
|
||||
TinySlabMeta* m = &adopt->slabs[s];
|
||||
// Quick check: Does this slab have a freelist?
|
||||
if (m->freelist) {
|
||||
// Yes! Try to acquire it immediately (first-fit)
|
||||
best = s;
|
||||
break; // ✅ OPTIMIZATION: Stop at first slab with freelist!
|
||||
}
|
||||
// Optional: Also check remote_heads if we want to prioritize those
|
||||
// (But for Larson, freelist is sufficient)
|
||||
}
|
||||
if (best >= 0) {
|
||||
if (adopt_bind_if_safe(tls, adopt, best, class_idx)) {
|
||||
if (g_adopt_cool_period > 0) g_tls_adopt_cd[class_idx] = g_adopt_cool_period;
|
||||
return adopt;
|
||||
}
|
||||
}
|
||||
// If no freelist found, ignore and continue (optional: republish)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 7.6 Step 4: Check existing SuperSlab with priority order
|
||||
if (tls->ss) {
|
||||
// Priority 1: Reuse slabs with freelist (already freed blocks)
|
||||
int tls_cap = ss_slabs_capacity(tls->ss);
|
||||
uint32_t nonempty_mask = 0;
|
||||
do {
|
||||
static int g_mask_en = -1;
|
||||
if (__builtin_expect(g_mask_en == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_FREELIST_MASK");
|
||||
g_mask_en = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
if (__builtin_expect(g_mask_en, 0)) {
|
||||
nonempty_mask = atomic_load_explicit(&tls->ss->freelist_mask, memory_order_acquire);
|
||||
break;
|
||||
}
|
||||
for (int i = 0; i < tls_cap; i++) {
|
||||
if (tls->ss->slabs[i].freelist) nonempty_mask |= (1u << i);
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// O(1) lookup: scan mask with ctz (1 instruction!)
|
||||
while (__builtin_expect(nonempty_mask != 0, 1)) {
|
||||
int i = __builtin_ctz(nonempty_mask); // Find first non-empty slab (O(1))
|
||||
nonempty_mask &= ~(1u << i); // Clear bit for next iteration
|
||||
|
||||
// FIX #1 DELETED (Race condition fix):
|
||||
// Previous drain without ownership caused concurrent freelist corruption.
|
||||
// Ownership protocol: MUST bind+owner_cas BEFORE drain (see Fix #3 in tiny_refill.h).
|
||||
// Remote frees will be drained when the slab is adopted (see tiny_refill.h paths).
|
||||
|
||||
if (adopt_bind_if_safe(tls, tls->ss, i, class_idx)) {
|
||||
reused_slabs = 1;
|
||||
return tls->ss;
|
||||
}
|
||||
}
|
||||
|
||||
// Priority 2: Use unused slabs (virgin slabs)
|
||||
if (tls->ss->active_slabs < tls_cap) {
|
||||
// Find next free slab
|
||||
int free_idx = superslab_find_free_slab(tls->ss);
|
||||
free_idx_attempted = free_idx;
|
||||
if (free_idx >= 0) {
|
||||
// Initialize this slab
|
||||
uint32_t my_tid = tiny_self_u32();
|
||||
superslab_init_slab(tls->ss, free_idx, g_tiny_class_sizes[class_idx], my_tid);
|
||||
|
||||
// Update TLS cache (unified update)
|
||||
tiny_tls_bind_slab(tls, tls->ss, free_idx);
|
||||
|
||||
return tls->ss;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to adopt a partial SuperSlab from registry (one-shot, cheap scan)
|
||||
// This reduces pressure to allocate new SS when other threads freed blocks.
|
||||
// Phase 6: Registry Optimization - Use per-class registry for O(class_size) scan
|
||||
if (!tls->ss) {
|
||||
// Phase 6: Use per-class registry (262K → ~10-100 entries per class!)
|
||||
extern SuperSlab* g_super_reg_by_class[TINY_NUM_CLASSES][SUPER_REG_PER_CLASS];
|
||||
extern int g_super_reg_class_size[TINY_NUM_CLASSES];
|
||||
|
||||
const int scan_max = tiny_reg_scan_max();
|
||||
int reg_size = g_super_reg_class_size[class_idx];
|
||||
int scan_limit = (scan_max < reg_size) ? scan_max : reg_size;
|
||||
|
||||
for (int i = 0; i < scan_limit; i++) {
|
||||
SuperSlab* ss = g_super_reg_by_class[class_idx][i];
|
||||
if (!ss || ss->magic != SUPERSLAB_MAGIC) continue;
|
||||
// Note: class_idx check is not needed (per-class registry!)
|
||||
|
||||
// Pick first slab with freelist (Box 4: adopt boundary helper)
|
||||
int reg_cap = ss_slabs_capacity(ss);
|
||||
for (int s = 0; s < reg_cap; s++) {
|
||||
if (ss->slabs[s].freelist) {
|
||||
if (adopt_bind_if_safe(tls, ss, s, class_idx)) return ss;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Must-adopt-before-mmap gate: attempt sticky/hot/bench/mailbox/registry small-window
|
||||
{
|
||||
SuperSlab* gate_ss = tiny_must_adopt_gate(class_idx, tls);
|
||||
if (gate_ss) return gate_ss;
|
||||
}
|
||||
|
||||
// Allocate new SuperSlab
|
||||
SuperSlab* ss = superslab_allocate((uint8_t)class_idx);
|
||||
if (!ss) {
|
||||
if (!g_superslab_refill_debug_once) {
|
||||
g_superslab_refill_debug_once = 1;
|
||||
int err = errno;
|
||||
|
||||
// CRITICAL FIX (BUG #11): Protect fprintf() with lock_depth
|
||||
// fprintf() can call malloc for buffering → must use libc malloc
|
||||
extern __thread int g_hakmem_lock_depth;
|
||||
g_hakmem_lock_depth++;
|
||||
|
||||
fprintf(stderr,
|
||||
"[DEBUG] superslab_refill returned NULL (OOM) detail: class=%d prev_ss=%p active=%u bitmap=0x%08x prev_meta=%p used=%u cap=%u slab_idx=%u reused_freelist=%d free_idx=%d errno=%d\n",
|
||||
class_idx,
|
||||
(void*)prev_ss,
|
||||
(unsigned)prev_active,
|
||||
prev_bitmap,
|
||||
(void*)prev_meta,
|
||||
(unsigned)prev_meta_used,
|
||||
(unsigned)prev_meta_cap,
|
||||
(unsigned)prev_slab_idx,
|
||||
reused_slabs,
|
||||
free_idx_attempted,
|
||||
err);
|
||||
|
||||
g_hakmem_lock_depth--;
|
||||
}
|
||||
// Clear errno to avoid confusion in fallback paths
|
||||
errno = 0;
|
||||
return NULL; // OOM
|
||||
}
|
||||
|
||||
// Initialize first slab
|
||||
uint32_t my_tid = tiny_self_u32();
|
||||
superslab_init_slab(ss, 0, g_tiny_class_sizes[class_idx], my_tid);
|
||||
superslab_init_slab(ss,
|
||||
slab_idx,
|
||||
g_tiny_class_sizes[class_idx],
|
||||
my_tid);
|
||||
|
||||
// Cache in unified TLS(前のSS参照を解放)
|
||||
SuperSlab* old = tls->ss;
|
||||
tiny_tls_bind_slab(tls, ss, 0);
|
||||
// Maintain refcount(将来の空回収に備え、TLS参照をカウント)
|
||||
superslab_ref_inc(ss);
|
||||
if (old && old != ss) {
|
||||
superslab_ref_dec(old);
|
||||
tiny_tls_bind_slab(tls, ss, slab_idx);
|
||||
|
||||
// Sanity: TLS must now describe this slab for this class.
|
||||
if (!(tls->ss == ss &&
|
||||
tls->slab_idx == slab_idx &&
|
||||
tls->meta != NULL &&
|
||||
tls->meta->class_idx == (uint8_t)class_idx)) {
|
||||
tls->ss = NULL;
|
||||
tls->meta = NULL;
|
||||
tls->slab_idx = -1;
|
||||
tls->slab_base = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ss;
|
||||
}
|
||||
|
||||
// Phase 6.24: SuperSlab-based allocation (TLS unified, Medium fix)
|
||||
static inline void* hak_tiny_alloc_superslab(int class_idx) {
|
||||
// DEBUG: Function entry trace (gated to avoid ring spam)
|
||||
do {
|
||||
static int g_alloc_ring = -1;
|
||||
if (__builtin_expect(g_alloc_ring == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_ALLOC_RING");
|
||||
g_alloc_ring = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
if (g_alloc_ring) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_ENTER, 0x01, (void*)(uintptr_t)class_idx, 0);
|
||||
}
|
||||
} while (0);
|
||||
// ============================================================================
|
||||
// Phase 6.24: SuperSlab-based allocation using TLS slab
|
||||
// ============================================================================
|
||||
|
||||
// MidTC fast path: 128..1024B(class>=4)はTLS tcacheを最優先
|
||||
static inline void* hak_tiny_alloc_superslab(int class_idx) {
|
||||
// MidTC fast path
|
||||
do {
|
||||
void* mp = midtc_pop(class_idx);
|
||||
if (mp) {
|
||||
@ -574,142 +207,83 @@ static inline void* hak_tiny_alloc_superslab(int class_idx) {
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// Phase 6.24: 1 TLS read (down from 3)
|
||||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||||
|
||||
TinySlabMeta* meta = tls->meta;
|
||||
int slab_idx = tls->slab_idx;
|
||||
|
||||
if (meta && slab_idx >= 0 && tls->ss) {
|
||||
// CRITICAL: Verify class consistency BEFORE using tls->ss
|
||||
// If tls->ss->size_class != class_idx, unbind and refill
|
||||
if (tls->ss->size_class != class_idx) {
|
||||
// Class mismatch: TLS is bound to wrong SuperSlab
|
||||
// This happens when TLS was previously bound to different class
|
||||
// Ensure TLS metadata matches class and slab base
|
||||
if (tls->meta->class_idx != (uint8_t)class_idx) {
|
||||
tls->ss = NULL;
|
||||
tls->meta = NULL;
|
||||
tls->slab_idx = -1;
|
||||
tls->slab_base = NULL;
|
||||
meta = NULL; // Force refill path below
|
||||
meta = NULL;
|
||||
} else {
|
||||
// Ensure TLS view is consistent with canonical slab_base
|
||||
uint8_t* canonical = tiny_slab_base_for(tls->ss, slab_idx);
|
||||
if (tls->slab_base != canonical) {
|
||||
tls->slab_base = canonical;
|
||||
}
|
||||
}
|
||||
// A/B: Relaxed read for remote head presence check
|
||||
static int g_alloc_remote_relax = -1; // env: HAKMEM_TINY_ALLOC_REMOTE_RELAX=1 → relaxed
|
||||
if (__builtin_expect(g_alloc_remote_relax == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_ALLOC_REMOTE_RELAX");
|
||||
g_alloc_remote_relax = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
uintptr_t pending = atomic_load_explicit(&tls->ss->remote_heads[slab_idx],
|
||||
g_alloc_remote_relax ? memory_order_relaxed
|
||||
: memory_order_acquire);
|
||||
if (__builtin_expect(pending != 0, 0)) {
|
||||
uint32_t self_tid = tiny_self_u32();
|
||||
if (ss_owner_try_acquire(meta, self_tid)) {
|
||||
_ss_remote_drain_to_freelist_unsafe(tls->ss, slab_idx, meta);
|
||||
|
||||
// Drain remote if needed (ownership-checked elsewhere)
|
||||
if (meta) {
|
||||
static int g_alloc_remote_relax = -1;
|
||||
if (__builtin_expect(g_alloc_remote_relax == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_ALLOC_REMOTE_RELAX");
|
||||
g_alloc_remote_relax = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
uintptr_t pending = atomic_load_explicit(
|
||||
&tls->ss->remote_heads[slab_idx],
|
||||
g_alloc_remote_relax ? memory_order_relaxed : memory_order_acquire);
|
||||
if (__builtin_expect(pending != 0, 0)) {
|
||||
uint32_t self_tid = tiny_self_u32();
|
||||
if (ss_owner_try_acquire(meta, self_tid)) {
|
||||
_ss_remote_drain_to_freelist_unsafe(tls->ss, slab_idx, meta);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FIX #2 DELETED (Race condition fix):
|
||||
// Previous drain-all-slabs without ownership caused concurrent freelist corruption.
|
||||
// Problem: Thread A owns slab 5, Thread B drains all slabs including 5 → both modify freelist → crash.
|
||||
// Ownership protocol: MUST bind+owner_cas BEFORE drain (see Fix #3 in tiny_refill.h).
|
||||
// Remote frees will be drained when the slab is adopted via refill paths.
|
||||
|
||||
// Fast path: Direct metadata access (no repeated TLS reads!)
|
||||
// Fast path: linear carve from current TLS slab
|
||||
if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
|
||||
// Box 3: Get stride
|
||||
size_t block_size = tiny_stride_for_class(tls->ss->size_class);
|
||||
uint8_t* base = tls->slab_base; // tls_slab_base は tiny_slab_base_for_geometry(ss, slab_idx) 由来(唯一の真実)
|
||||
|
||||
// ULTRATHINK DEBUG: Capture the 53-byte mystery
|
||||
if (tiny_refill_failfast_level() >= 3 && tls->ss->size_class == 7 && slab_idx == 0) {
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] === Before allocation ===\n");
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] ss=%p, slab_idx=%d, class=%d\n",
|
||||
tls->ss, slab_idx, tls->ss->size_class);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] block_size=%zu, meta->used=%d, meta->capacity=%d\n",
|
||||
block_size, meta->used, meta->capacity);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] tls->slab_base=%p\n", base);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] tiny_slab_base_for(ss,%d)=%p\n",
|
||||
slab_idx, tiny_slab_base_for(tls->ss, slab_idx));
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] sizeof(SuperSlab)=%zu\n", sizeof(SuperSlab));
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] Expected base should be: ss + %zu\n", sizeof(SuperSlab));
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] Actual base is: ss + 1024\n");
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] Base error: %zu - 1024 = %zu bytes\n",
|
||||
sizeof(SuperSlab), sizeof(SuperSlab) - 1024);
|
||||
}
|
||||
|
||||
void* block = (void*)(base + ((size_t)meta->used * block_size));
|
||||
|
||||
// ULTRATHINK DEBUG: After calculation
|
||||
if (tiny_refill_failfast_level() >= 3 && tls->ss->size_class == 7 && slab_idx == 0) {
|
||||
size_t offset_from_ss = (uintptr_t)block - (uintptr_t)tls->ss;
|
||||
size_t expected_offset = 1024 + ((size_t)meta->used * block_size);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] === Calculated block address ===\n");
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] block=%p\n", block);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] offset from ss=%zu (0x%zx)\n", offset_from_ss, offset_from_ss);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] expected offset=%zu (0x%zx)\n", expected_offset, expected_offset);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] difference=%zd bytes\n",
|
||||
(ssize_t)offset_from_ss - (ssize_t)expected_offset);
|
||||
}
|
||||
|
||||
size_t block_size = tiny_stride_for_class(meta->class_idx);
|
||||
uint8_t* base = tls->slab_base;
|
||||
void* block = base + ((size_t)meta->used * block_size);
|
||||
meta->used++;
|
||||
|
||||
// Fail-Fast: self-check(デバッグ時のみ有効)
|
||||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||||
uintptr_t base_ss = (uintptr_t)tls->ss;
|
||||
size_t ss_size = (size_t)1ULL << tls->ss->lg_size;
|
||||
uintptr_t limit_ss = base_ss + ss_size;
|
||||
uintptr_t p = (uintptr_t)block;
|
||||
size_t off = (p >= base_ss) ? (size_t)(p - base_ss) : 0;
|
||||
int in_range = (p >= base_ss) && (p < limit_ss);
|
||||
int in_range = (p >= base_ss) && (p < base_ss + ss_size);
|
||||
int aligned = ((p - (uintptr_t)base) % block_size) == 0;
|
||||
int idx_ok = (tls->slab_idx >= 0) && (tls->slab_idx < ss_slabs_capacity(tls->ss));
|
||||
if (!in_range || !aligned || !idx_ok || meta->used > (uint32_t)meta->capacity) {
|
||||
// Diagnostic log before abort
|
||||
fprintf(stderr, "[ALLOC_CARVE_BUG] cls=%u slab=%d used=%u cap=%u base=%p bs=%zu ptr=%p offset=%zu\n",
|
||||
tls->ss->size_class, tls->slab_idx, meta->used, meta->capacity,
|
||||
(void*)base, block_size, block, off);
|
||||
fprintf(stderr, "[ALLOC_CARVE_BUG] in_range=%d aligned=%d idx_ok=%d used_check=%d\n",
|
||||
in_range, aligned, idx_ok, meta->used > (uint32_t)meta->capacity);
|
||||
fflush(stderr);
|
||||
|
||||
int idx_ok = (tls->slab_idx >= 0) &&
|
||||
(tls->slab_idx < ss_slabs_capacity(tls->ss));
|
||||
if (!in_range || !aligned || !idx_ok || meta->used > meta->capacity) {
|
||||
tiny_failfast_abort_ptr("alloc_ret_align",
|
||||
tls->ss,
|
||||
tls->slab_idx,
|
||||
block,
|
||||
!in_range ? "out_of_range"
|
||||
: (!aligned ? "misaligned"
|
||||
: (!idx_ok ? "bad_slab_idx"
|
||||
: "over_capacity")));
|
||||
"superslab_tls_invariant");
|
||||
}
|
||||
}
|
||||
|
||||
// Track active blocks in SuperSlab for conservative reclamation
|
||||
ss_active_inc(tls->ss);
|
||||
// Route: slab linear
|
||||
ROUTE_MARK(11); ROUTE_COMMIT(class_idx, 0x60);
|
||||
HAK_RET_ALLOC(class_idx, block); // Phase 8.4: Zero hot-path overhead
|
||||
HAK_RET_ALLOC(class_idx, block);
|
||||
}
|
||||
|
||||
// Freelist path from current TLS slab
|
||||
if (meta && meta->freelist) {
|
||||
// Freelist allocation
|
||||
void* block = meta->freelist;
|
||||
// Safety: bounds/alignment check (debug)
|
||||
if (__builtin_expect(g_tiny_safe_free, 0)) {
|
||||
// Box 3: Get stride and slab base for validation
|
||||
size_t blk = tiny_stride_for_class(tls->ss->size_class);
|
||||
size_t blk = tiny_stride_for_class(meta->class_idx);
|
||||
uint8_t* base = tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||||
uintptr_t delta = (uintptr_t)block - (uintptr_t)base;
|
||||
int align_ok = ((delta % blk) == 0);
|
||||
int range_ok = (delta / blk) < meta->capacity;
|
||||
if (!align_ok || !range_ok) {
|
||||
uintptr_t info = ((uintptr_t)(align_ok ? 1u : 0u) << 32) | (uint32_t)(range_ok ? 1u : 0u);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)tls->ss->size_class, block, info | 0xA100u);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return NULL; }
|
||||
return NULL;
|
||||
}
|
||||
@ -717,71 +291,33 @@ static inline void* hak_tiny_alloc_superslab(int class_idx) {
|
||||
void* next = tiny_next_read(class_idx, block);
|
||||
meta->freelist = next;
|
||||
meta->used++;
|
||||
// Optional: clear freelist bit when becomes empty
|
||||
do {
|
||||
static int g_mask_en = -1;
|
||||
if (__builtin_expect(g_mask_en == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_FREELIST_MASK");
|
||||
g_mask_en = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
if (__builtin_expect(g_mask_en, 0) && next == NULL) {
|
||||
uint32_t bit = (1u << slab_idx);
|
||||
atomic_fetch_and_explicit(&tls->ss->freelist_mask, ~bit, memory_order_release);
|
||||
}
|
||||
} while (0);
|
||||
// Track active blocks in SuperSlab for conservative reclamation
|
||||
ss_active_inc(tls->ss);
|
||||
// Route: slab freelist
|
||||
ROUTE_MARK(12); ROUTE_COMMIT(class_idx, 0x61);
|
||||
HAK_RET_ALLOC(class_idx, block); // Phase 8.4: Zero hot-path overhead
|
||||
HAK_RET_ALLOC(class_idx, block);
|
||||
}
|
||||
|
||||
// Slow path: Refill TLS slab
|
||||
// Slow path: acquire a new slab via shared pool
|
||||
SuperSlab* ss = superslab_refill(class_idx);
|
||||
if (!ss) {
|
||||
static int log_oom = 0;
|
||||
if (log_oom < 2) { fprintf(stderr, "[DEBUG] superslab_refill returned NULL (OOM)\n"); log_oom++; }
|
||||
return NULL; // OOM
|
||||
if (log_oom < 2) {
|
||||
fprintf(stderr, "[DEBUG] superslab_refill returned NULL (OOM)\n");
|
||||
log_oom++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Retry allocation (metadata already cached in superslab_refill)
|
||||
// Retry after refill
|
||||
tls = &g_tls_slabs[class_idx];
|
||||
meta = tls->meta;
|
||||
|
||||
// DEBUG: Check each condition (disabled for benchmarks)
|
||||
// static int log_retry = 0;
|
||||
// if (log_retry < 2) {
|
||||
// fprintf(stderr, "[DEBUG] Retry alloc: meta=%p, freelist=%p, used=%u, capacity=%u, slab_base=%p\n",
|
||||
// (void*)meta, meta ? meta->freelist : NULL,
|
||||
// meta ? meta->used : 0, meta ? meta->capacity : 0,
|
||||
// (void*)tls->slab_base);
|
||||
// log_retry++;
|
||||
// }
|
||||
|
||||
if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
|
||||
// Box 3: Get stride and calculate block address
|
||||
size_t block_size = tiny_stride_for_class(ss->size_class);
|
||||
if (meta && meta->freelist == NULL &&
|
||||
meta->used < meta->capacity && tls->slab_base) {
|
||||
size_t block_size = tiny_stride_for_class(meta->class_idx);
|
||||
void* block = tiny_block_at_index(tls->slab_base, meta->used, block_size);
|
||||
|
||||
// Disabled for benchmarks
|
||||
// static int log_success = 0;
|
||||
// if (log_success < 2) {
|
||||
// fprintf(stderr, "[DEBUG] Superslab alloc SUCCESS: ptr=%p, class=%d, used=%u->%u\n",
|
||||
// block, class_idx, meta->used, meta->used + 1);
|
||||
// log_success++;
|
||||
// }
|
||||
|
||||
meta->used++;
|
||||
|
||||
// Track active blocks in SuperSlab for conservative reclamation
|
||||
ss_active_inc(ss);
|
||||
HAK_RET_ALLOC(class_idx, block); // Phase 8.4: Zero hot-path overhead
|
||||
HAK_RET_ALLOC(class_idx, block);
|
||||
}
|
||||
|
||||
// Disabled for benchmarks
|
||||
// static int log_fail = 0;
|
||||
// if (log_fail < 2) {
|
||||
// fprintf(stderr, "[DEBUG] Retry alloc FAILED - returning NULL\n");
|
||||
// log_fail++;
|
||||
// }
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -26,14 +26,15 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
uintptr_t ss_base = (uintptr_t)ss;
|
||||
if (__builtin_expect(slab_idx < 0, 0)) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xBAD1u, ss_base, ss_size, (uintptr_t)ptr);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xFFu, ptr, aux);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||||
return;
|
||||
}
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
uint8_t cls = meta->class_idx;
|
||||
|
||||
// Debug: Log first C7 alloc/free for path verification
|
||||
if (ss->size_class == 7) {
|
||||
if (cls == 7) {
|
||||
static _Atomic int c7_free_count = 0;
|
||||
int count = atomic_fetch_add_explicit(&c7_free_count, 1, memory_order_relaxed);
|
||||
if (count == 0) {
|
||||
@ -45,20 +46,20 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
|
||||
tiny_remote_watch_note("free_enter", ss, slab_idx, ptr, 0xA240u, tiny_self_u32(), 0);
|
||||
extern __thread TinyTLSSlab g_tls_slabs[];
|
||||
tiny_alloc_dump_tls_state(ss->size_class, "watch_free_enter", &g_tls_slabs[ss->size_class]);
|
||||
tiny_alloc_dump_tls_state(cls, "watch_free_enter", &g_tls_slabs[cls]);
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
extern __thread TinyTLSMag g_tls_mags[];
|
||||
TinyTLSMag* watch_mag = &g_tls_mags[ss->size_class];
|
||||
TinyTLSMag* watch_mag = &g_tls_mags[cls];
|
||||
fprintf(stderr,
|
||||
"[REMOTE_WATCH_MAG] cls=%u mag_top=%d cap=%d\n",
|
||||
ss->size_class,
|
||||
cls,
|
||||
watch_mag->top,
|
||||
watch_mag->cap);
|
||||
#endif
|
||||
}
|
||||
// BUGFIX: Validate size_class before using as array index (prevents OOB)
|
||||
if (__builtin_expect(ss->size_class < 0 || ss->size_class >= TINY_NUM_CLASSES, 0)) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xF1, ptr, (uintptr_t)ss->size_class);
|
||||
// BUGFIX (Phase 12): Validate class_idx before using as array index
|
||||
if (__builtin_expect(cls >= TINY_NUM_CLASSES, 0)) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xF1, ptr, (uintptr_t)cls);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||||
return;
|
||||
}
|
||||
@ -67,7 +68,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
// Expected impact: -10~-15% CPU (eliminates O(n) duplicate scan)
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (__builtin_expect(g_tiny_safe_free, 0)) {
|
||||
size_t blk = g_tiny_class_sizes[ss->size_class];
|
||||
size_t blk = g_tiny_class_sizes[cls];
|
||||
uint8_t* base = tiny_slab_base_for(ss, slab_idx);
|
||||
uintptr_t delta = (uintptr_t)ptr - (uintptr_t)base;
|
||||
int cap_ok = (meta->capacity > 0) ? 1 : 0;
|
||||
@ -78,17 +79,17 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
if (align_ok) code |= 0x2u;
|
||||
if (range_ok) code |= 0x1u;
|
||||
uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||||
return;
|
||||
}
|
||||
// Duplicate in freelist (best-effort scan up to 64)
|
||||
// NOTE: This O(n) scan is VERY expensive (can scan 64 pointers per free!)
|
||||
void* scan = meta->freelist; int scanned = 0; int dup = 0;
|
||||
while (scan && scanned < 64) { if (scan == base) { dup = 1; break; } scan = tiny_next_read(ss->size_class, scan); scanned++; }
|
||||
while (scan && scanned < 64) { if (scan == base) { dup = 1; break; } scan = tiny_next_read(cls, scan); scanned++; }
|
||||
if (dup) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xDFu, ss_base, ss_size, (uintptr_t)ptr);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||||
return;
|
||||
}
|
||||
@ -97,8 +98,8 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
|
||||
// Phase E1-CORRECT: C7 now has headers like other classes
|
||||
// Validation must check base pointer (ptr-1) alignment, not user pointer
|
||||
if (__builtin_expect(ss->size_class == 7, 0)) {
|
||||
size_t blk = g_tiny_class_sizes[ss->size_class];
|
||||
if (__builtin_expect(cls == 7, 0)) {
|
||||
size_t blk = g_tiny_class_sizes[cls];
|
||||
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
|
||||
uintptr_t delta = (uintptr_t)base - (uintptr_t)slab_base;
|
||||
int cap_ok = (meta->capacity > 0) ? 1 : 0;
|
||||
@ -106,7 +107,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
int range_ok = cap_ok && (delta / blk) < meta->capacity;
|
||||
if (!align_ok || !range_ok) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xA107u, ss_base, ss_size, (uintptr_t)ptr);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Debug build: Print diagnostic info before failing
|
||||
fprintf(stderr, "[C7_ALIGN_CHECK_FAIL] ptr=%p base=%p slab_base=%p\n", ptr, base, (void*)slab_base);
|
||||
@ -122,25 +123,26 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 6.23: Same-thread check
|
||||
// Phase 6.23: Same-thread check (Phase 12: owner_tid_low)
|
||||
uint32_t my_tid = tiny_self_u32();
|
||||
uint8_t my_tid_low = (uint8_t)my_tid;
|
||||
const int debug_guard = g_debug_remote_guard;
|
||||
static __thread int g_debug_free_count = 0;
|
||||
// If owner is not set yet, claim ownership to avoid spurious remote path in 1T
|
||||
if (!g_tiny_force_remote && meta->owner_tid == 0) {
|
||||
meta->owner_tid = my_tid;
|
||||
// If owner is not set yet, claim ownership (low 8 bits) to avoid spurious remote path in 1T
|
||||
if (!g_tiny_force_remote && meta->owner_tid_low == 0) {
|
||||
meta->owner_tid_low = my_tid_low;
|
||||
}
|
||||
if (!g_tiny_force_remote && meta->owner_tid != 0 && meta->owner_tid == my_tid) {
|
||||
if (!g_tiny_force_remote && meta->owner_tid_low != 0 && meta->owner_tid_low == my_tid_low) {
|
||||
ROUTE_MARK(17); // free_same_thread
|
||||
// Fast path: Direct freelist push (same-thread)
|
||||
if (0 && debug_guard && g_debug_free_count < 1) {
|
||||
fprintf(stderr, "[FREE_SS] SAME-THREAD: owner=%u my=%u\n",
|
||||
meta->owner_tid, my_tid);
|
||||
meta->owner_tid_low, my_tid);
|
||||
g_debug_free_count++;
|
||||
}
|
||||
if (__builtin_expect(meta->used == 0, 0)) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0x00u, ss_base, ss_size, (uintptr_t)ptr);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||||
return;
|
||||
}
|
||||
@ -150,13 +152,13 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
int transitioned = tiny_free_remote_box(ss, slab_idx, meta, base, my_tid);
|
||||
if (transitioned) {
|
||||
extern unsigned long long g_remote_free_transitions[];
|
||||
g_remote_free_transitions[ss->size_class]++;
|
||||
g_remote_free_transitions[cls]++;
|
||||
// Free-side route: remote transition observed
|
||||
do {
|
||||
static int g_route_free = -1; if (__builtin_expect(g_route_free == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_ROUTE_FREE");
|
||||
g_route_free = (e && *e && *e != '0') ? 1 : 0; }
|
||||
if (g_route_free) route_free_commit((int)ss->size_class, (1ull<<18), 0xE2);
|
||||
if (g_route_free) route_free_commit((int)cls, (1ull<<18), 0xE2);
|
||||
} while (0);
|
||||
}
|
||||
return;
|
||||
@ -169,9 +171,9 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
g_free_to_ss = (e && *e && *e != '0') ? 1 : 0; // default OFF
|
||||
}
|
||||
if (!g_free_to_ss) {
|
||||
int cls = (int)ss->size_class;
|
||||
if (midtc_enabled() && cls >= 4) {
|
||||
if (midtc_push(cls, base)) {
|
||||
int mid_cls = (int)cls;
|
||||
if (midtc_enabled() && mid_cls >= 4) {
|
||||
if (midtc_push(mid_cls, base)) {
|
||||
// Treat as returned to TLS cache (not SS freelist)
|
||||
meta->used--;
|
||||
ss_active_dec_one(ss);
|
||||
@ -188,39 +190,43 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
if (prev_before == NULL) {
|
||||
ROUTE_MARK(19); // first_free_transition
|
||||
extern unsigned long long g_first_free_transitions[];
|
||||
g_first_free_transitions[ss->size_class]++;
|
||||
g_first_free_transitions[cls]++;
|
||||
ROUTE_MARK(20); // mailbox_publish
|
||||
// Free-side route commit (one-shot)
|
||||
do {
|
||||
static int g_route_free = -1; if (__builtin_expect(g_route_free == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_ROUTE_FREE");
|
||||
g_route_free = (e && *e && *e != '0') ? 1 : 0; }
|
||||
int cls = (int)ss->size_class;
|
||||
if (g_route_free) route_free_commit(cls, (1ull<<19) | (1ull<<20), 0xE1);
|
||||
} while (0);
|
||||
}
|
||||
|
||||
if (__builtin_expect(debug_guard, 0)) {
|
||||
fprintf(stderr, "[REMOTE_LOCAL] cls=%u slab=%d owner=%u my=%u ptr=%p prev=%p used=%u\n",
|
||||
ss->size_class, slab_idx, meta->owner_tid, my_tid, ptr, prev_before, meta->used);
|
||||
fprintf(stderr, "[REMOTE_LOCAL] cls=%u slab=%d owner=%u my=%u ptr=%p prev=%p used=%u\n",
|
||||
cls, slab_idx, meta->owner_tid_low, my_tid, ptr, prev_before, meta->used);
|
||||
}
|
||||
|
||||
// 空検出は別途(ホットパス除外)
|
||||
// Phase 12: slab empty → shared pool に返却
|
||||
if (meta->used == 0) {
|
||||
extern void shared_pool_release_slab(SuperSlab* ss, int slab_idx);
|
||||
shared_pool_release_slab(ss, slab_idx);
|
||||
}
|
||||
} else {
|
||||
ROUTE_MARK(18); // free_remote_transition
|
||||
if (__builtin_expect(meta->owner_tid == my_tid && meta->owner_tid == 0, 0)) {
|
||||
if (__builtin_expect(meta->owner_tid_low == my_tid_low && meta->owner_tid_low == 0, 0)) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xA300u, ss_base, ss_size, (uintptr_t)ptr);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
|
||||
if (debug_guard) {
|
||||
fprintf(stderr, "[REMOTE_OWNER_ZERO] cls=%u slab=%d ptr=%p my=%u used=%u\n",
|
||||
ss->size_class, slab_idx, ptr, my_tid, (unsigned)meta->used);
|
||||
cls, slab_idx, ptr, my_tid, (unsigned)meta->used);
|
||||
}
|
||||
}
|
||||
tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "remote_free_enter", my_tid);
|
||||
// Slow path: Remote free (cross-thread)
|
||||
if (0 && debug_guard && g_debug_free_count < 5) {
|
||||
fprintf(stderr, "[FREE_SS] CROSS-THREAD: owner=%u my=%u slab_idx=%d\n",
|
||||
meta->owner_tid, my_tid, slab_idx);
|
||||
meta->owner_tid_low, my_tid, slab_idx);
|
||||
g_debug_free_count++;
|
||||
}
|
||||
if (__builtin_expect(g_tiny_safe_free, 0)) {
|
||||
@ -232,7 +238,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
while (cur && scanned < 64) {
|
||||
if ((cur < base) || (cur >= base + ss_size)) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xA200u, base, ss_size, cur);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, (void*)cur, aux);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||||
break;
|
||||
}
|
||||
@ -240,20 +246,21 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
if (__builtin_expect(g_remote_side_enable, 0)) {
|
||||
if (!tiny_remote_sentinel_ok((void*)cur)) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xA202u, base, ss_size, cur);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, (void*)cur, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, (void*)cur, aux);
|
||||
uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)(void*)cur, memory_order_relaxed);
|
||||
tiny_remote_report_corruption("scan", (void*)cur, observed);
|
||||
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
||||
fprintf(stderr,
|
||||
"[REMOTE_SENTINEL] cls=%u slab=%d cur=%p head=%p ptr=%p scanned=%d observed=0x%016" PRIxPTR " owner=%u used=%u freelist=%p remote_head=%p\n",
|
||||
ss->size_class,
|
||||
cls,
|
||||
slab_idx,
|
||||
(void*)cur,
|
||||
(void*)head,
|
||||
ptr,
|
||||
scanned,
|
||||
observed,
|
||||
meta->owner_tid,
|
||||
meta->owner_tid_low,
|
||||
(unsigned)meta->used,
|
||||
meta->freelist,
|
||||
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed));
|
||||
@ -265,24 +272,24 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
} else {
|
||||
if ((cur & (uintptr_t)(sizeof(void*) - 1)) != 0) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xA201u, base, ss_size, cur);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, (void*)cur, aux);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||||
break;
|
||||
}
|
||||
cur = (uintptr_t)tiny_next_read(ss->size_class, (void*)cur);
|
||||
cur = (uintptr_t)tiny_next_read(cls, (void*)cur);
|
||||
}
|
||||
scanned++;
|
||||
}
|
||||
if (dup) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xD1u, ss_base, ss_size, (uintptr_t)ptr);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (__builtin_expect(meta->used == 0, 0)) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0x01u, ss_base, ss_size, (uintptr_t)ptr);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||||
return;
|
||||
}
|
||||
@ -310,9 +317,9 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
// Use remote queue
|
||||
uintptr_t head_word = __atomic_load_n((uintptr_t*)base, __ATOMIC_RELAXED);
|
||||
if (debug_guard) fprintf(stderr, "[REMOTE_PUSH_CALL] cls=%u slab=%d owner=%u my=%u ptr=%p used=%u remote_count=%u head=%p word=0x%016" PRIxPTR "\n",
|
||||
ss->size_class,
|
||||
cls,
|
||||
slab_idx,
|
||||
meta->owner_tid,
|
||||
meta->owner_tid_low,
|
||||
my_tid,
|
||||
ptr,
|
||||
(unsigned)meta->used,
|
||||
@ -330,13 +337,13 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xA214u, ss_base, ss_size, (uintptr_t)ptr);
|
||||
tiny_remote_watch_mark(ptr, "dup_prevent", my_tid);
|
||||
tiny_remote_watch_note("dup_prevent", ss, slab_idx, ptr, 0xA214u, my_tid, 0);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
|
||||
tiny_failfast_abort_ptr("double_free_remote", ss, slab_idx, ptr, "remote_side_contains");
|
||||
}
|
||||
if (__builtin_expect(g_remote_side_enable && (head_word & 0xFFFFu) == 0x6261u, 0)) {
|
||||
// TLS guard scribble detected on the node's first word → same-pointer double free across routes
|
||||
uintptr_t aux = tiny_remote_pack_diag(0xA213u, ss_base, ss_size, (uintptr_t)ptr);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
|
||||
tiny_remote_watch_mark(ptr, "pre_push", my_tid);
|
||||
tiny_remote_watch_note("pre_push", ss, slab_idx, ptr, 0xA231u, my_tid, 0);
|
||||
tiny_remote_report_corruption("pre_push", ptr, head_word);
|
||||
@ -350,16 +357,17 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
// ss_active_dec_one(ss); // REMOVED: Already called inside ss_remote_push()
|
||||
if (was_empty) {
|
||||
extern unsigned long long g_remote_free_transitions[];
|
||||
g_remote_free_transitions[ss->size_class]++;
|
||||
ss_partial_publish((int)ss->size_class, ss);
|
||||
g_remote_free_transitions[cls]++;
|
||||
g_remote_free_transitions[cls]++;
|
||||
ss_partial_publish((int)cls, ss);
|
||||
}
|
||||
} else {
|
||||
// Fallback: direct freelist push (legacy)
|
||||
if (debug_guard) fprintf(stderr, "[FREE_SS] Using LEGACY freelist push (not remote queue)\n");
|
||||
void* prev = meta->freelist;
|
||||
tiny_next_write(ss->size_class, base, prev);
|
||||
tiny_next_write(cls, base, prev);
|
||||
meta->freelist = base;
|
||||
tiny_failfast_log("free_local_legacy", ss->size_class, ss, meta, ptr, prev);
|
||||
tiny_failfast_log("free_local_legacy", cls, ss, meta, ptr, prev);
|
||||
do {
|
||||
static int g_mask_en = -1;
|
||||
if (__builtin_expect(g_mask_en == -1, 0)) {
|
||||
@ -374,10 +382,15 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
|
||||
meta->used--;
|
||||
ss_active_dec_one(ss);
|
||||
if (prev == NULL) {
|
||||
ss_partial_publish((int)ss->size_class, ss);
|
||||
ss_partial_publish((int)cls, ss);
|
||||
}
|
||||
}
|
||||
|
||||
// 空検出は別途(ホットパス除外)
|
||||
// Phase 12: slab empty → shared pool に返却
|
||||
if (meta->used == 0) {
|
||||
extern void shared_pool_release_slab(SuperSlab* ss, int slab_idx);
|
||||
shared_pool_release_slab(ss, slab_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user