2025-11-05 12:31:14 +09:00
|
|
|
|
// hakmem_tiny_lifecycle.inc
|
|
|
|
|
|
// Phase 2D-3: Lifecycle management functions extraction
|
|
|
|
|
|
//
|
|
|
|
|
|
// This file contains lifecycle management functions extracted from hakmem_tiny.c
|
|
|
|
|
|
// to improve code organization. Reduces main file by ~226 lines (16%).
|
|
|
|
|
|
//
|
|
|
|
|
|
// Functions:
|
|
|
|
|
|
// - hak_tiny_trim(): Trim and cleanup operations
|
|
|
|
|
|
// - tiny_tls_cache_drain(): TLS cache draining
|
|
|
|
|
|
// - tiny_apply_mem_diet(): Memory diet mode application
|
|
|
|
|
|
//
|
|
|
|
|
|
// Cold/maintenance path - not performance critical.
|
|
|
|
|
|
#include "tiny_tls_guard.h"
|
2025-11-20 02:01:52 +09:00
|
|
|
|
#include "box/ss_slab_meta_box.h" // Phase 3d-A: SlabMeta Box boundary
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
2025-11-13 16:33:03 +09:00
|
|
|
|
// Phase 12: Helper to derive a representative class index for a SuperSlab
|
|
|
|
|
|
// from per-slab metadata (all slabs are empty when used in trim).
|
|
|
|
|
|
static inline int superslab_any_class_idx(SuperSlab* ss) {
|
|
|
|
|
|
if (!ss) return -1;
|
|
|
|
|
|
int cap = ss_slabs_capacity(ss);
|
|
|
|
|
|
for (int s = 0; s < cap; s++) {
|
2025-11-20 02:01:52 +09:00
|
|
|
|
uint8_t cls = ss_slab_meta_class_idx_get(ss, s);
|
2025-11-13 16:33:03 +09:00
|
|
|
|
if (cls < TINY_NUM_CLASSES) return (int)cls;
|
|
|
|
|
|
}
|
|
|
|
|
|
return -1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
void hak_tiny_trim(void) {
|
2025-11-07 01:27:04 +09:00
|
|
|
|
static _Atomic int g_trim_call_count = 0;
|
|
|
|
|
|
int call_count = atomic_fetch_add_explicit(&g_trim_call_count, 1, memory_order_relaxed);
|
|
|
|
|
|
if (call_count < 5) { // First 5 calls only
|
|
|
|
|
|
fprintf(stderr, "[DEBUG hak_tiny_trim] Call #%d\n", call_count + 1);
|
|
|
|
|
|
}
|
2025-11-05 12:31:14 +09:00
|
|
|
|
if (!g_tiny_initialized) return;
|
|
|
|
|
|
// Lazy init for SS reserve env
|
|
|
|
|
|
if (__builtin_expect(g_empty_reserve, 1) == -1) {
|
|
|
|
|
|
char* er = getenv("HAKMEM_TINY_SS_RESERVE");
|
|
|
|
|
|
int v = (er ? atoi(er) : EMPTY_SUPERSLAB_RESERVE);
|
|
|
|
|
|
if (v < 0) {
|
|
|
|
|
|
v = 0;
|
|
|
|
|
|
} else if (v > 4) {
|
|
|
|
|
|
v = 4; // guardrails
|
|
|
|
|
|
}
|
|
|
|
|
|
g_empty_reserve = v;
|
|
|
|
|
|
}
|
|
|
|
|
|
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
|
|
|
|
|
|
tiny_tls_cache_drain(class_idx);
|
|
|
|
|
|
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
|
|
|
|
|
|
pthread_mutex_lock(lock);
|
|
|
|
|
|
TinySlab** head = &g_tiny_pool.free_slabs[class_idx];
|
|
|
|
|
|
TinySlab* prev = NULL;
|
|
|
|
|
|
TinySlab* slab = *head;
|
|
|
|
|
|
while (slab) {
|
|
|
|
|
|
TinySlab* next = slab->next;
|
|
|
|
|
|
if (slab->free_count == slab->total_count) {
|
|
|
|
|
|
if (prev) prev->next = next; else *head = next;
|
|
|
|
|
|
release_slab(slab);
|
|
|
|
|
|
slab = next;
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
prev = slab;
|
|
|
|
|
|
slab = next;
|
|
|
|
|
|
}
|
|
|
|
|
|
pthread_mutex_unlock(lock);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Optional: attempt SuperSlab reclamation for completely empty SS (conservative)
|
|
|
|
|
|
static int g_trim_ss_enabled = -1;
|
|
|
|
|
|
static int g_ss_partial_env = -1;
|
|
|
|
|
|
if (g_trim_ss_enabled == -1) {
|
|
|
|
|
|
char* env = getenv("HAKMEM_TINY_TRIM_SS");
|
|
|
|
|
|
if (env) {
|
|
|
|
|
|
g_trim_ss_enabled = (atoi(env) != 0) ? 1 : 0;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
g_trim_ss_enabled = 1; // default ON for better memory efficiency
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if (g_ss_partial_env == -1) {
|
|
|
|
|
|
char* env = getenv("HAKMEM_TINY_SS_PARTIAL");
|
|
|
|
|
|
if (env) {
|
|
|
|
|
|
g_ss_partial_enable = (atoi(env) != 0) ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
char* interval = getenv("HAKMEM_TINY_SS_PARTIAL_INTERVAL");
|
|
|
|
|
|
if (interval) {
|
|
|
|
|
|
int v = atoi(interval);
|
|
|
|
|
|
if (v < 1) v = 1;
|
|
|
|
|
|
g_ss_partial_interval = (uint32_t)v;
|
|
|
|
|
|
}
|
|
|
|
|
|
g_ss_partial_env = 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!g_trim_ss_enabled) return;
|
|
|
|
|
|
|
|
|
|
|
|
uint32_t partial_epoch = 0;
|
|
|
|
|
|
if (g_ss_partial_enable) {
|
|
|
|
|
|
partial_epoch = atomic_fetch_add_explicit(&g_ss_partial_epoch, 1u, memory_order_relaxed) + 1u;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Walk the registry and collect empty SuperSlabs by class
|
|
|
|
|
|
for (int i = 0; i < SUPER_REG_SIZE; i++) {
|
|
|
|
|
|
SuperRegEntry* e = &g_super_reg[i];
|
|
|
|
|
|
uintptr_t base = atomic_load_explicit((_Atomic uintptr_t*)&e->base, memory_order_acquire);
|
|
|
|
|
|
if (base == 0) continue;
|
|
|
|
|
|
SuperSlab* ss = e->ss;
|
|
|
|
|
|
if (!ss || ss->magic != SUPERSLAB_MAGIC) continue;
|
|
|
|
|
|
// Only consider completely empty SuperSlabs
|
2025-11-07 01:27:04 +09:00
|
|
|
|
uint32_t active = atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed);
|
|
|
|
|
|
static _Atomic int g_debug_ss_scan = 0;
|
|
|
|
|
|
int scan_count = atomic_fetch_add_explicit(&g_debug_ss_scan, 1, memory_order_relaxed);
|
|
|
|
|
|
if (scan_count < 20) { // First 20 SS scans
|
2025-11-13 16:33:03 +09:00
|
|
|
|
int log_cls = superslab_any_class_idx(ss);
|
2025-11-07 01:27:04 +09:00
|
|
|
|
fprintf(stderr, "[DEBUG trim scan] ss=%p class=%d active=%u\n",
|
2025-11-13 16:33:03 +09:00
|
|
|
|
(void*)ss, log_cls, active);
|
2025-11-07 01:27:04 +09:00
|
|
|
|
}
|
|
|
|
|
|
if (active != 0) continue;
|
2025-11-13 16:33:03 +09:00
|
|
|
|
int k = superslab_any_class_idx(ss);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
if (k < 0 || k >= TINY_NUM_CLASSES) continue;
|
|
|
|
|
|
// Do not free if current thread still caches this SS in TLS
|
|
|
|
|
|
if (g_tls_slabs[k].ss == ss) continue;
|
|
|
|
|
|
// Keep up to EMPTY_SUPERSLAB_RESERVE per class as reserve; free extras
|
|
|
|
|
|
pthread_mutex_lock(&g_empty_lock);
|
|
|
|
|
|
if (g_empty_reserve == 0) {
|
|
|
|
|
|
pthread_mutex_unlock(&g_empty_lock);
|
|
|
|
|
|
if (superslab_ref_get(ss) == 0) {
|
|
|
|
|
|
superslab_free(ss);
|
|
|
|
|
|
}
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (g_empty_superslabs[k] == NULL) {
|
|
|
|
|
|
g_empty_superslabs[k] = ss;
|
|
|
|
|
|
g_empty_counts[k] = 1;
|
|
|
|
|
|
superslab_partial_release(ss, partial_epoch);
|
|
|
|
|
|
pthread_mutex_unlock(&g_empty_lock);
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
// If same as reserved, nothing to do
|
|
|
|
|
|
if (g_empty_superslabs[k] == ss) {
|
|
|
|
|
|
superslab_partial_release(ss, partial_epoch);
|
|
|
|
|
|
pthread_mutex_unlock(&g_empty_lock);
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
int can_free = (g_empty_counts[k] >= g_empty_reserve);
|
|
|
|
|
|
if (!can_free) {
|
|
|
|
|
|
// Replace reserve with this newer SS
|
|
|
|
|
|
g_empty_superslabs[k] = ss;
|
|
|
|
|
|
g_empty_counts[k] = 1;
|
|
|
|
|
|
superslab_partial_release(ss, partial_epoch);
|
|
|
|
|
|
pthread_mutex_unlock(&g_empty_lock);
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
pthread_mutex_unlock(&g_empty_lock);
|
|
|
|
|
|
// Free outside of the empty_lock(保守的: refcount==0 のときのみ)
|
|
|
|
|
|
if (superslab_ref_get(ss) == 0) {
|
|
|
|
|
|
superslab_free(ss);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void tiny_tls_cache_drain(int class_idx) {
|
|
|
|
|
|
TinyTLSList* tls = &g_tls_lists[class_idx];
|
|
|
|
|
|
|
Phase E3-FINAL: Fix Box API offset bugs - ALL classes now use correct offsets
## Root Cause Analysis (GPT5)
**Physical Layout Constraints**:
- Class 0: 8B = [1B header][7B payload] → offset 1 = 9B needed = ❌ IMPOSSIBLE
- Class 1-6: >=16B = [1B header][15B+ payload] → offset 1 = ✅ POSSIBLE
- Class 7: 1KB → offset 0 (compatibility)
**Correct Specification**:
- HAKMEM_TINY_HEADER_CLASSIDX != 0:
- Class 0, 7: next at offset 0 (overwrites header when on freelist)
- Class 1-6: next at offset 1 (after header)
- HAKMEM_TINY_HEADER_CLASSIDX == 0:
- All classes: next at offset 0
**Previous Bug**:
- Attempted "ALL classes offset 1" unification
- Class 0 with offset 1 caused immediate SEGV (9B > 8B block size)
- Mixed 2-arg/3-arg API caused confusion
## Fixes Applied
### 1. Restored 3-Argument Box API (core/box/tiny_next_ptr_box.h)
```c
// Correct signatures
void tiny_next_write(int class_idx, void* base, void* next_value)
void* tiny_next_read(int class_idx, const void* base)
// Correct offset calculation
size_t offset = (class_idx == 0 || class_idx == 7) ? 0 : 1;
```
### 2. Updated 123+ Call Sites Across 34 Files
- hakmem_tiny_hot_pop_v4.inc.h (4 locations)
- hakmem_tiny_fastcache.inc.h (3 locations)
- hakmem_tiny_tls_list.h (12 locations)
- superslab_inline.h (5 locations)
- tiny_fastcache.h (3 locations)
- ptr_trace.h (macro definitions)
- tls_sll_box.h (2 locations)
- + 27 additional files
Pattern: `tiny_next_read(base)` → `tiny_next_read(class_idx, base)`
Pattern: `tiny_next_write(base, next)` → `tiny_next_write(class_idx, base, next)`
### 3. Added Sentinel Detection Guards
- tiny_fast_push(): Block nodes with sentinel in ptr or ptr->next
- tls_list_push(): Block nodes with sentinel in ptr or ptr->next
- Defense-in-depth against remote free sentinel leakage
## Verification (GPT5 Report)
**Test Command**: `./out/release/bench_random_mixed_hakmem --iterations=70000`
**Results**:
- ✅ Main loop completed successfully
- ✅ Drain phase completed successfully
- ✅ NO SEGV (previous crash at iteration 66151 is FIXED)
- ℹ️ Final log: "tiny_alloc(1024) failed" is normal fallback to Mid/ACE layers
**Analysis**:
- Class 0 immediate SEGV: ✅ RESOLVED (correct offset 0 now used)
- 66K iteration crash: ✅ RESOLVED (offset consistency fixed)
- Box API conflicts: ✅ RESOLVED (unified 3-arg API)
## Technical Details
### Offset Logic Justification
```
Class 0: 8B block → next pointer (8B) fits ONLY at offset 0
Class 1: 16B block → next pointer (8B) fits at offset 1 (after 1B header)
Class 2: 32B block → next pointer (8B) fits at offset 1
...
Class 6: 512B block → next pointer (8B) fits at offset 1
Class 7: 1024B block → offset 0 for legacy compatibility
```
### Files Modified (Summary)
- Core API: `box/tiny_next_ptr_box.h`
- Hot paths: `hakmem_tiny_hot_pop*.inc.h`, `tiny_fastcache.h`
- TLS layers: `hakmem_tiny_tls_list.h`, `hakmem_tiny_tls_ops.h`
- SuperSlab: `superslab_inline.h`, `tiny_superslab_*.inc.h`
- Refill: `hakmem_tiny_refill.inc.h`, `tiny_refill_opt.h`
- Free paths: `tiny_free_magazine.inc.h`, `tiny_superslab_free.inc.h`
- Documentation: Multiple Phase E3 reports
## Remaining Work
None for Box API offset bugs - all structural issues resolved.
Future enhancements (non-critical):
- Periodic `grep -R '*(void**)' core/` to detect direct pointer access violations
- Enforce Box API usage via static analysis
- Document offset rationale in architecture docs
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 06:50:20 +09:00
|
|
|
|
// Phase E1-CORRECT: Drain TLS SLL cache for ALL classes
|
|
|
|
|
|
#include "box/tiny_next_ptr_box.h"
|
2025-11-20 07:32:30 +09:00
|
|
|
|
void* sll = g_tls_sll[class_idx].head;
|
|
|
|
|
|
g_tls_sll[class_idx].head = NULL;
|
|
|
|
|
|
g_tls_sll[class_idx].count = 0;
|
2025-11-05 12:31:14 +09:00
|
|
|
|
while (sll) {
|
Phase E3-FINAL: Fix Box API offset bugs - ALL classes now use correct offsets
## Root Cause Analysis (GPT5)
**Physical Layout Constraints**:
- Class 0: 8B = [1B header][7B payload] → offset 1 = 9B needed = ❌ IMPOSSIBLE
- Class 1-6: >=16B = [1B header][15B+ payload] → offset 1 = ✅ POSSIBLE
- Class 7: 1KB → offset 0 (compatibility)
**Correct Specification**:
- HAKMEM_TINY_HEADER_CLASSIDX != 0:
- Class 0, 7: next at offset 0 (overwrites header when on freelist)
- Class 1-6: next at offset 1 (after header)
- HAKMEM_TINY_HEADER_CLASSIDX == 0:
- All classes: next at offset 0
**Previous Bug**:
- Attempted "ALL classes offset 1" unification
- Class 0 with offset 1 caused immediate SEGV (9B > 8B block size)
- Mixed 2-arg/3-arg API caused confusion
## Fixes Applied
### 1. Restored 3-Argument Box API (core/box/tiny_next_ptr_box.h)
```c
// Correct signatures
void tiny_next_write(int class_idx, void* base, void* next_value)
void* tiny_next_read(int class_idx, const void* base)
// Correct offset calculation
size_t offset = (class_idx == 0 || class_idx == 7) ? 0 : 1;
```
### 2. Updated 123+ Call Sites Across 34 Files
- hakmem_tiny_hot_pop_v4.inc.h (4 locations)
- hakmem_tiny_fastcache.inc.h (3 locations)
- hakmem_tiny_tls_list.h (12 locations)
- superslab_inline.h (5 locations)
- tiny_fastcache.h (3 locations)
- ptr_trace.h (macro definitions)
- tls_sll_box.h (2 locations)
- + 27 additional files
Pattern: `tiny_next_read(base)` → `tiny_next_read(class_idx, base)`
Pattern: `tiny_next_write(base, next)` → `tiny_next_write(class_idx, base, next)`
### 3. Added Sentinel Detection Guards
- tiny_fast_push(): Block nodes with sentinel in ptr or ptr->next
- tls_list_push(): Block nodes with sentinel in ptr or ptr->next
- Defense-in-depth against remote free sentinel leakage
## Verification (GPT5 Report)
**Test Command**: `./out/release/bench_random_mixed_hakmem --iterations=70000`
**Results**:
- ✅ Main loop completed successfully
- ✅ Drain phase completed successfully
- ✅ NO SEGV (previous crash at iteration 66151 is FIXED)
- ℹ️ Final log: "tiny_alloc(1024) failed" is normal fallback to Mid/ACE layers
**Analysis**:
- Class 0 immediate SEGV: ✅ RESOLVED (correct offset 0 now used)
- 66K iteration crash: ✅ RESOLVED (offset consistency fixed)
- Box API conflicts: ✅ RESOLVED (unified 3-arg API)
## Technical Details
### Offset Logic Justification
```
Class 0: 8B block → next pointer (8B) fits ONLY at offset 0
Class 1: 16B block → next pointer (8B) fits at offset 1 (after 1B header)
Class 2: 32B block → next pointer (8B) fits at offset 1
...
Class 6: 512B block → next pointer (8B) fits at offset 1
Class 7: 1024B block → offset 0 for legacy compatibility
```
### Files Modified (Summary)
- Core API: `box/tiny_next_ptr_box.h`
- Hot paths: `hakmem_tiny_hot_pop*.inc.h`, `tiny_fastcache.h`
- TLS layers: `hakmem_tiny_tls_list.h`, `hakmem_tiny_tls_ops.h`
- SuperSlab: `superslab_inline.h`, `tiny_superslab_*.inc.h`
- Refill: `hakmem_tiny_refill.inc.h`, `tiny_refill_opt.h`
- Free paths: `tiny_free_magazine.inc.h`, `tiny_superslab_free.inc.h`
- Documentation: Multiple Phase E3 reports
## Remaining Work
None for Box API offset bugs - all structural issues resolved.
Future enhancements (non-critical):
- Periodic `grep -R '*(void**)' core/` to detect direct pointer access violations
- Enforce Box API usage via static analysis
- Document offset rationale in architecture docs
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 06:50:20 +09:00
|
|
|
|
void* next = tiny_next_read(class_idx, sll);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
tiny_tls_list_guard_push(class_idx, tls, sll);
|
2025-11-11 10:00:36 +09:00
|
|
|
|
tls_list_push(tls, sll, class_idx);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
sll = next;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
Phase E3-FINAL: Fix Box API offset bugs - ALL classes now use correct offsets
## Root Cause Analysis (GPT5)
**Physical Layout Constraints**:
- Class 0: 8B = [1B header][7B payload] → offset 1 = 9B needed = ❌ IMPOSSIBLE
- Class 1-6: >=16B = [1B header][15B+ payload] → offset 1 = ✅ POSSIBLE
- Class 7: 1KB → offset 0 (compatibility)
**Correct Specification**:
- HAKMEM_TINY_HEADER_CLASSIDX != 0:
- Class 0, 7: next at offset 0 (overwrites header when on freelist)
- Class 1-6: next at offset 1 (after header)
- HAKMEM_TINY_HEADER_CLASSIDX == 0:
- All classes: next at offset 0
**Previous Bug**:
- Attempted "ALL classes offset 1" unification
- Class 0 with offset 1 caused immediate SEGV (9B > 8B block size)
- Mixed 2-arg/3-arg API caused confusion
## Fixes Applied
### 1. Restored 3-Argument Box API (core/box/tiny_next_ptr_box.h)
```c
// Correct signatures
void tiny_next_write(int class_idx, void* base, void* next_value)
void* tiny_next_read(int class_idx, const void* base)
// Correct offset calculation
size_t offset = (class_idx == 0 || class_idx == 7) ? 0 : 1;
```
### 2. Updated 123+ Call Sites Across 34 Files
- hakmem_tiny_hot_pop_v4.inc.h (4 locations)
- hakmem_tiny_fastcache.inc.h (3 locations)
- hakmem_tiny_tls_list.h (12 locations)
- superslab_inline.h (5 locations)
- tiny_fastcache.h (3 locations)
- ptr_trace.h (macro definitions)
- tls_sll_box.h (2 locations)
- + 27 additional files
Pattern: `tiny_next_read(base)` → `tiny_next_read(class_idx, base)`
Pattern: `tiny_next_write(base, next)` → `tiny_next_write(class_idx, base, next)`
### 3. Added Sentinel Detection Guards
- tiny_fast_push(): Block nodes with sentinel in ptr or ptr->next
- tls_list_push(): Block nodes with sentinel in ptr or ptr->next
- Defense-in-depth against remote free sentinel leakage
## Verification (GPT5 Report)
**Test Command**: `./out/release/bench_random_mixed_hakmem --iterations=70000`
**Results**:
- ✅ Main loop completed successfully
- ✅ Drain phase completed successfully
- ✅ NO SEGV (previous crash at iteration 66151 is FIXED)
- ℹ️ Final log: "tiny_alloc(1024) failed" is normal fallback to Mid/ACE layers
**Analysis**:
- Class 0 immediate SEGV: ✅ RESOLVED (correct offset 0 now used)
- 66K iteration crash: ✅ RESOLVED (offset consistency fixed)
- Box API conflicts: ✅ RESOLVED (unified 3-arg API)
## Technical Details
### Offset Logic Justification
```
Class 0: 8B block → next pointer (8B) fits ONLY at offset 0
Class 1: 16B block → next pointer (8B) fits at offset 1 (after 1B header)
Class 2: 32B block → next pointer (8B) fits at offset 1
...
Class 6: 512B block → next pointer (8B) fits at offset 1
Class 7: 1024B block → offset 0 for legacy compatibility
```
### Files Modified (Summary)
- Core API: `box/tiny_next_ptr_box.h`
- Hot paths: `hakmem_tiny_hot_pop*.inc.h`, `tiny_fastcache.h`
- TLS layers: `hakmem_tiny_tls_list.h`, `hakmem_tiny_tls_ops.h`
- SuperSlab: `superslab_inline.h`, `tiny_superslab_*.inc.h`
- Refill: `hakmem_tiny_refill.inc.h`, `tiny_refill_opt.h`
- Free paths: `tiny_free_magazine.inc.h`, `tiny_superslab_free.inc.h`
- Documentation: Multiple Phase E3 reports
## Remaining Work
None for Box API offset bugs - all structural issues resolved.
Future enhancements (non-critical):
- Periodic `grep -R '*(void**)' core/` to detect direct pointer access violations
- Enforce Box API usage via static analysis
- Document offset rationale in architecture docs
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 06:50:20 +09:00
|
|
|
|
// Phase E1-CORRECT: Drain fast tier cache for ALL classes
|
|
|
|
|
|
void* fast = g_fast_head[class_idx];
|
2025-11-05 12:31:14 +09:00
|
|
|
|
g_fast_head[class_idx] = NULL;
|
|
|
|
|
|
g_fast_count[class_idx] = 0;
|
|
|
|
|
|
while (fast) {
|
Phase E3-FINAL: Fix Box API offset bugs - ALL classes now use correct offsets
## Root Cause Analysis (GPT5)
**Physical Layout Constraints**:
- Class 0: 8B = [1B header][7B payload] → offset 1 = 9B needed = ❌ IMPOSSIBLE
- Class 1-6: >=16B = [1B header][15B+ payload] → offset 1 = ✅ POSSIBLE
- Class 7: 1KB → offset 0 (compatibility)
**Correct Specification**:
- HAKMEM_TINY_HEADER_CLASSIDX != 0:
- Class 0, 7: next at offset 0 (overwrites header when on freelist)
- Class 1-6: next at offset 1 (after header)
- HAKMEM_TINY_HEADER_CLASSIDX == 0:
- All classes: next at offset 0
**Previous Bug**:
- Attempted "ALL classes offset 1" unification
- Class 0 with offset 1 caused immediate SEGV (9B > 8B block size)
- Mixed 2-arg/3-arg API caused confusion
## Fixes Applied
### 1. Restored 3-Argument Box API (core/box/tiny_next_ptr_box.h)
```c
// Correct signatures
void tiny_next_write(int class_idx, void* base, void* next_value)
void* tiny_next_read(int class_idx, const void* base)
// Correct offset calculation
size_t offset = (class_idx == 0 || class_idx == 7) ? 0 : 1;
```
### 2. Updated 123+ Call Sites Across 34 Files
- hakmem_tiny_hot_pop_v4.inc.h (4 locations)
- hakmem_tiny_fastcache.inc.h (3 locations)
- hakmem_tiny_tls_list.h (12 locations)
- superslab_inline.h (5 locations)
- tiny_fastcache.h (3 locations)
- ptr_trace.h (macro definitions)
- tls_sll_box.h (2 locations)
- + 27 additional files
Pattern: `tiny_next_read(base)` → `tiny_next_read(class_idx, base)`
Pattern: `tiny_next_write(base, next)` → `tiny_next_write(class_idx, base, next)`
### 3. Added Sentinel Detection Guards
- tiny_fast_push(): Block nodes with sentinel in ptr or ptr->next
- tls_list_push(): Block nodes with sentinel in ptr or ptr->next
- Defense-in-depth against remote free sentinel leakage
## Verification (GPT5 Report)
**Test Command**: `./out/release/bench_random_mixed_hakmem --iterations=70000`
**Results**:
- ✅ Main loop completed successfully
- ✅ Drain phase completed successfully
- ✅ NO SEGV (previous crash at iteration 66151 is FIXED)
- ℹ️ Final log: "tiny_alloc(1024) failed" is normal fallback to Mid/ACE layers
**Analysis**:
- Class 0 immediate SEGV: ✅ RESOLVED (correct offset 0 now used)
- 66K iteration crash: ✅ RESOLVED (offset consistency fixed)
- Box API conflicts: ✅ RESOLVED (unified 3-arg API)
## Technical Details
### Offset Logic Justification
```
Class 0: 8B block → next pointer (8B) fits ONLY at offset 0
Class 1: 16B block → next pointer (8B) fits at offset 1 (after 1B header)
Class 2: 32B block → next pointer (8B) fits at offset 1
...
Class 6: 512B block → next pointer (8B) fits at offset 1
Class 7: 1024B block → offset 0 for legacy compatibility
```
### Files Modified (Summary)
- Core API: `box/tiny_next_ptr_box.h`
- Hot paths: `hakmem_tiny_hot_pop*.inc.h`, `tiny_fastcache.h`
- TLS layers: `hakmem_tiny_tls_list.h`, `hakmem_tiny_tls_ops.h`
- SuperSlab: `superslab_inline.h`, `tiny_superslab_*.inc.h`
- Refill: `hakmem_tiny_refill.inc.h`, `tiny_refill_opt.h`
- Free paths: `tiny_free_magazine.inc.h`, `tiny_superslab_free.inc.h`
- Documentation: Multiple Phase E3 reports
## Remaining Work
None for Box API offset bugs - all structural issues resolved.
Future enhancements (non-critical):
- Periodic `grep -R '*(void**)' core/` to detect direct pointer access violations
- Enforce Box API usage via static analysis
- Document offset rationale in architecture docs
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 06:50:20 +09:00
|
|
|
|
void* next = tiny_next_read(class_idx, fast);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
tiny_tls_list_guard_push(class_idx, tls, fast);
|
2025-11-11 10:00:36 +09:00
|
|
|
|
tls_list_push(tls, fast, class_idx);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
fast = next;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Spill TLS list back to owners
|
|
|
|
|
|
void* head = NULL;
|
|
|
|
|
|
void* tail = NULL;
|
|
|
|
|
|
while (1) {
|
2025-11-11 10:00:36 +09:00
|
|
|
|
uint32_t taken = tls_list_bulk_take(tls, 0u, &head, &tail, class_idx);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
if (taken == 0u || head == NULL) break;
|
|
|
|
|
|
void* cur = head;
|
|
|
|
|
|
while (cur) {
|
Phase E3-FINAL: Fix Box API offset bugs - ALL classes now use correct offsets
## Root Cause Analysis (GPT5)
**Physical Layout Constraints**:
- Class 0: 8B = [1B header][7B payload] → offset 1 = 9B needed = ❌ IMPOSSIBLE
- Class 1-6: >=16B = [1B header][15B+ payload] → offset 1 = ✅ POSSIBLE
- Class 7: 1KB → offset 0 (compatibility)
**Correct Specification**:
- HAKMEM_TINY_HEADER_CLASSIDX != 0:
- Class 0, 7: next at offset 0 (overwrites header when on freelist)
- Class 1-6: next at offset 1 (after header)
- HAKMEM_TINY_HEADER_CLASSIDX == 0:
- All classes: next at offset 0
**Previous Bug**:
- Attempted "ALL classes offset 1" unification
- Class 0 with offset 1 caused immediate SEGV (9B > 8B block size)
- Mixed 2-arg/3-arg API caused confusion
## Fixes Applied
### 1. Restored 3-Argument Box API (core/box/tiny_next_ptr_box.h)
```c
// Correct signatures
void tiny_next_write(int class_idx, void* base, void* next_value)
void* tiny_next_read(int class_idx, const void* base)
// Correct offset calculation
size_t offset = (class_idx == 0 || class_idx == 7) ? 0 : 1;
```
### 2. Updated 123+ Call Sites Across 34 Files
- hakmem_tiny_hot_pop_v4.inc.h (4 locations)
- hakmem_tiny_fastcache.inc.h (3 locations)
- hakmem_tiny_tls_list.h (12 locations)
- superslab_inline.h (5 locations)
- tiny_fastcache.h (3 locations)
- ptr_trace.h (macro definitions)
- tls_sll_box.h (2 locations)
- + 27 additional files
Pattern: `tiny_next_read(base)` → `tiny_next_read(class_idx, base)`
Pattern: `tiny_next_write(base, next)` → `tiny_next_write(class_idx, base, next)`
### 3. Added Sentinel Detection Guards
- tiny_fast_push(): Block nodes with sentinel in ptr or ptr->next
- tls_list_push(): Block nodes with sentinel in ptr or ptr->next
- Defense-in-depth against remote free sentinel leakage
## Verification (GPT5 Report)
**Test Command**: `./out/release/bench_random_mixed_hakmem --iterations=70000`
**Results**:
- ✅ Main loop completed successfully
- ✅ Drain phase completed successfully
- ✅ NO SEGV (previous crash at iteration 66151 is FIXED)
- ℹ️ Final log: "tiny_alloc(1024) failed" is normal fallback to Mid/ACE layers
**Analysis**:
- Class 0 immediate SEGV: ✅ RESOLVED (correct offset 0 now used)
- 66K iteration crash: ✅ RESOLVED (offset consistency fixed)
- Box API conflicts: ✅ RESOLVED (unified 3-arg API)
## Technical Details
### Offset Logic Justification
```
Class 0: 8B block → next pointer (8B) fits ONLY at offset 0
Class 1: 16B block → next pointer (8B) fits at offset 1 (after 1B header)
Class 2: 32B block → next pointer (8B) fits at offset 1
...
Class 6: 512B block → next pointer (8B) fits at offset 1
Class 7: 1024B block → offset 0 for legacy compatibility
```
### Files Modified (Summary)
- Core API: `box/tiny_next_ptr_box.h`
- Hot paths: `hakmem_tiny_hot_pop*.inc.h`, `tiny_fastcache.h`
- TLS layers: `hakmem_tiny_tls_list.h`, `hakmem_tiny_tls_ops.h`
- SuperSlab: `superslab_inline.h`, `tiny_superslab_*.inc.h`
- Refill: `hakmem_tiny_refill.inc.h`, `tiny_refill_opt.h`
- Free paths: `tiny_free_magazine.inc.h`, `tiny_superslab_free.inc.h`
- Documentation: Multiple Phase E3 reports
## Remaining Work
None for Box API offset bugs - all structural issues resolved.
Future enhancements (non-critical):
- Periodic `grep -R '*(void**)' core/` to detect direct pointer access violations
- Enforce Box API usage via static analysis
- Document offset rationale in architecture docs
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-13 06:50:20 +09:00
|
|
|
|
void* next = tiny_next_read(class_idx, cur);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
SuperSlab* ss = hak_super_lookup(cur);
|
|
|
|
|
|
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
|
|
|
|
|
hak_tiny_free_superslab(cur, ss);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
TinySlab* slab = hak_tiny_owner_slab(cur);
|
|
|
|
|
|
if (slab) {
|
|
|
|
|
|
int cls = slab->class_idx;
|
|
|
|
|
|
size_t block_size = g_tiny_class_sizes[cls];
|
|
|
|
|
|
int block_idx = (int)(((uintptr_t)cur - (uintptr_t)slab->base) / block_size);
|
|
|
|
|
|
pthread_mutex_t* lock = &g_tiny_class_locks[cls].m;
|
|
|
|
|
|
pthread_mutex_lock(lock);
|
|
|
|
|
|
if (hak_tiny_is_used(slab, block_idx)) {
|
|
|
|
|
|
hak_tiny_set_free(slab, block_idx);
|
|
|
|
|
|
int was_full = (slab->free_count == 0);
|
|
|
|
|
|
slab->free_count++;
|
|
|
|
|
|
g_tiny_pool.free_count[cls]++;
|
|
|
|
|
|
if (was_full) {
|
|
|
|
|
|
move_to_free_list(cls, slab);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (slab->free_count == slab->total_count) {
|
|
|
|
|
|
TinySlab** headp = &g_tiny_pool.free_slabs[cls];
|
|
|
|
|
|
TinySlab* prev = NULL;
|
|
|
|
|
|
for (TinySlab* s = *headp; s; prev = s, s = s->next) {
|
|
|
|
|
|
if (s == slab) {
|
|
|
|
|
|
if (prev) prev->next = s->next;
|
|
|
|
|
|
else *headp = s->next;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
release_slab(slab);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
pthread_mutex_unlock(lock);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
cur = next;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Release TLS-bound SuperSlab reference when caches are empty
|
|
|
|
|
|
TinyTLSSlab* tls_slab = &g_tls_slabs[class_idx];
|
|
|
|
|
|
SuperSlab* held_ss = tls_slab->ss;
|
|
|
|
|
|
if (held_ss) {
|
|
|
|
|
|
int keep_binding = 0;
|
|
|
|
|
|
if (tls_slab->meta && tls_slab->meta->used > 0) {
|
|
|
|
|
|
keep_binding = 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!keep_binding) {
|
|
|
|
|
|
tls_slab->ss = NULL;
|
|
|
|
|
|
tls_slab->meta = NULL;
|
|
|
|
|
|
tls_slab->slab_base = NULL;
|
|
|
|
|
|
tls_slab->slab_idx = 0;
|
|
|
|
|
|
superslab_ref_dec(held_ss);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
g_tls_active_slab_a[class_idx] = NULL;
|
|
|
|
|
|
g_tls_active_slab_b[class_idx] = NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void tiny_apply_mem_diet(void) {
|
|
|
|
|
|
g_mag_cap_limit = 64;
|
|
|
|
|
|
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
|
|
|
|
|
|
if (g_fast_cap[class_idx] > 0) {
|
|
|
|
|
|
uint16_t limit = (class_idx <= 3) ? 48 : 32;
|
|
|
|
|
|
if (limit < 16) limit = 16;
|
|
|
|
|
|
if (g_fast_cap[class_idx] > limit) {
|
|
|
|
|
|
g_fast_cap[class_idx] = limit;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
TinyTLSList* tls = &g_tls_lists[class_idx];
|
|
|
|
|
|
uint32_t new_cap = tls->cap;
|
|
|
|
|
|
if (new_cap > (uint32_t)g_mag_cap_limit) new_cap = (uint32_t)g_mag_cap_limit;
|
|
|
|
|
|
if (new_cap < 16u) new_cap = 16u;
|
|
|
|
|
|
tls->cap = new_cap;
|
|
|
|
|
|
tls->refill_low = tiny_tls_default_refill(new_cap);
|
|
|
|
|
|
tls->spill_high = tiny_tls_default_spill(new_cap);
|
|
|
|
|
|
tiny_tls_publish_targets(class_idx, new_cap);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|