Files
hakmem/core/hakmem_tiny_lifecycle.inc
Moe Charm (CI) d5302e9c87 Phase 7 follow-up: header-aware in BG spill, TLS drain, and aggressive inline macros
- bg_spill: link/traverse next at base+1 for C0–C6, base for C7
- lifecycle: drain TLS SLL and fast caches reading next with header-aware offsets
- tiny_alloc_fast_inline: POP/PUSH macros made header-aware to match tls_sll_box rules
- add optional FREE_WRAP_ENTER trace (HAKMEM_FREE_WRAP_TRACE) for early triage

Result: 0xa0/…0099 bogus free logs gone; remaining SIGBUS appears in free path early. Next: instrument early libc fallback or guard invalid pointers during init to pinpoint source.
2025-11-10 18:21:32 +09:00

267 lines
9.9 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem_tiny_lifecycle.inc
// Phase 2D-3: Lifecycle management functions extraction
//
// This file contains lifecycle management functions extracted from hakmem_tiny.c
// to improve code organization. Reduces main file by ~226 lines (16%).
//
// Functions:
// - hak_tiny_trim(): Trim and cleanup operations
// - tiny_tls_cache_drain(): TLS cache draining
// - tiny_apply_mem_diet(): Memory diet mode application
//
// Cold/maintenance path - not performance critical.
#include "tiny_tls_guard.h"
void hak_tiny_trim(void) {
static _Atomic int g_trim_call_count = 0;
int call_count = atomic_fetch_add_explicit(&g_trim_call_count, 1, memory_order_relaxed);
if (call_count < 5) { // First 5 calls only
fprintf(stderr, "[DEBUG hak_tiny_trim] Call #%d\n", call_count + 1);
}
if (!g_tiny_initialized) return;
// Lazy init for SS reserve env
if (__builtin_expect(g_empty_reserve, 1) == -1) {
char* er = getenv("HAKMEM_TINY_SS_RESERVE");
int v = (er ? atoi(er) : EMPTY_SUPERSLAB_RESERVE);
if (v < 0) {
v = 0;
} else if (v > 4) {
v = 4; // guardrails
}
g_empty_reserve = v;
}
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
tiny_tls_cache_drain(class_idx);
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
pthread_mutex_lock(lock);
TinySlab** head = &g_tiny_pool.free_slabs[class_idx];
TinySlab* prev = NULL;
TinySlab* slab = *head;
while (slab) {
TinySlab* next = slab->next;
if (slab->free_count == slab->total_count) {
if (prev) prev->next = next; else *head = next;
release_slab(slab);
slab = next;
continue;
}
prev = slab;
slab = next;
}
pthread_mutex_unlock(lock);
}
// Optional: attempt SuperSlab reclamation for completely empty SS (conservative)
static int g_trim_ss_enabled = -1;
static int g_ss_partial_env = -1;
if (g_trim_ss_enabled == -1) {
char* env = getenv("HAKMEM_TINY_TRIM_SS");
if (env) {
g_trim_ss_enabled = (atoi(env) != 0) ? 1 : 0;
} else {
g_trim_ss_enabled = 1; // default ON for better memory efficiency
}
}
if (g_ss_partial_env == -1) {
char* env = getenv("HAKMEM_TINY_SS_PARTIAL");
if (env) {
g_ss_partial_enable = (atoi(env) != 0) ? 1 : 0;
}
char* interval = getenv("HAKMEM_TINY_SS_PARTIAL_INTERVAL");
if (interval) {
int v = atoi(interval);
if (v < 1) v = 1;
g_ss_partial_interval = (uint32_t)v;
}
g_ss_partial_env = 1;
}
if (!g_trim_ss_enabled) return;
uint32_t partial_epoch = 0;
if (g_ss_partial_enable) {
partial_epoch = atomic_fetch_add_explicit(&g_ss_partial_epoch, 1u, memory_order_relaxed) + 1u;
}
// Walk the registry and collect empty SuperSlabs by class
for (int i = 0; i < SUPER_REG_SIZE; i++) {
SuperRegEntry* e = &g_super_reg[i];
uintptr_t base = atomic_load_explicit((_Atomic uintptr_t*)&e->base, memory_order_acquire);
if (base == 0) continue;
SuperSlab* ss = e->ss;
if (!ss || ss->magic != SUPERSLAB_MAGIC) continue;
// Only consider completely empty SuperSlabs
uint32_t active = atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed);
static _Atomic int g_debug_ss_scan = 0;
int scan_count = atomic_fetch_add_explicit(&g_debug_ss_scan, 1, memory_order_relaxed);
if (scan_count < 20) { // First 20 SS scans
fprintf(stderr, "[DEBUG trim scan] ss=%p class=%d active=%u\n",
(void*)ss, ss->size_class, active);
}
if (active != 0) continue;
int k = ss->size_class;
if (k < 0 || k >= TINY_NUM_CLASSES) continue;
// Do not free if current thread still caches this SS in TLS
if (g_tls_slabs[k].ss == ss) continue;
// Keep up to EMPTY_SUPERSLAB_RESERVE per class as reserve; free extras
pthread_mutex_lock(&g_empty_lock);
if (g_empty_reserve == 0) {
pthread_mutex_unlock(&g_empty_lock);
if (superslab_ref_get(ss) == 0) {
superslab_free(ss);
}
continue;
}
if (g_empty_superslabs[k] == NULL) {
g_empty_superslabs[k] = ss;
g_empty_counts[k] = 1;
superslab_partial_release(ss, partial_epoch);
pthread_mutex_unlock(&g_empty_lock);
continue;
}
// If same as reserved, nothing to do
if (g_empty_superslabs[k] == ss) {
superslab_partial_release(ss, partial_epoch);
pthread_mutex_unlock(&g_empty_lock);
continue;
}
int can_free = (g_empty_counts[k] >= g_empty_reserve);
if (!can_free) {
// Replace reserve with this newer SS
g_empty_superslabs[k] = ss;
g_empty_counts[k] = 1;
superslab_partial_release(ss, partial_epoch);
pthread_mutex_unlock(&g_empty_lock);
continue;
}
pthread_mutex_unlock(&g_empty_lock);
// Free outside of the empty_lock保守的: refcount==0 のときのみ)
if (superslab_ref_get(ss) == 0) {
superslab_free(ss);
}
}
}
static void tiny_tls_cache_drain(int class_idx) {
TinyTLSList* tls = &g_tls_lists[class_idx];
// Drain TLS SLL cache
void* sll = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = NULL;
g_tls_sll_count[class_idx] = 0;
while (sll) {
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off_sll = (class_idx == 7) ? 0 : 1;
#else
const size_t next_off_sll = 0;
#endif
void* next = *(void**)((uint8_t*)sll + next_off_sll);
tiny_tls_list_guard_push(class_idx, tls, sll);
tls_list_push(tls, sll);
sll = next;
}
// Drain fast tier cache
void* fast = g_fast_head[class_idx];
g_fast_head[class_idx] = NULL;
g_fast_count[class_idx] = 0;
while (fast) {
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off_fast = (class_idx == 7) ? 0 : 1;
#else
const size_t next_off_fast = 0;
#endif
void* next = *(void**)((uint8_t*)fast + next_off_fast);
tiny_tls_list_guard_push(class_idx, tls, fast);
tls_list_push(tls, fast);
fast = next;
}
// Spill TLS list back to owners
void* head = NULL;
void* tail = NULL;
while (1) {
uint32_t taken = tls_list_bulk_take(tls, 0u, &head, &tail);
if (taken == 0u || head == NULL) break;
void* cur = head;
while (cur) {
void* next = *(void**)cur;
SuperSlab* ss = hak_super_lookup(cur);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
hak_tiny_free_superslab(cur, ss);
} else {
TinySlab* slab = hak_tiny_owner_slab(cur);
if (slab) {
int cls = slab->class_idx;
size_t block_size = g_tiny_class_sizes[cls];
int block_idx = (int)(((uintptr_t)cur - (uintptr_t)slab->base) / block_size);
pthread_mutex_t* lock = &g_tiny_class_locks[cls].m;
pthread_mutex_lock(lock);
if (hak_tiny_is_used(slab, block_idx)) {
hak_tiny_set_free(slab, block_idx);
int was_full = (slab->free_count == 0);
slab->free_count++;
g_tiny_pool.free_count[cls]++;
if (was_full) {
move_to_free_list(cls, slab);
}
if (slab->free_count == slab->total_count) {
TinySlab** headp = &g_tiny_pool.free_slabs[cls];
TinySlab* prev = NULL;
for (TinySlab* s = *headp; s; prev = s, s = s->next) {
if (s == slab) {
if (prev) prev->next = s->next;
else *headp = s->next;
break;
}
}
release_slab(slab);
}
}
pthread_mutex_unlock(lock);
}
}
cur = next;
}
}
// Release TLS-bound SuperSlab reference when caches are empty
TinyTLSSlab* tls_slab = &g_tls_slabs[class_idx];
SuperSlab* held_ss = tls_slab->ss;
if (held_ss) {
int keep_binding = 0;
if (tls_slab->meta && tls_slab->meta->used > 0) {
keep_binding = 1;
}
if (!keep_binding) {
tls_slab->ss = NULL;
tls_slab->meta = NULL;
tls_slab->slab_base = NULL;
tls_slab->slab_idx = 0;
superslab_ref_dec(held_ss);
}
}
g_tls_active_slab_a[class_idx] = NULL;
g_tls_active_slab_b[class_idx] = NULL;
}
static void tiny_apply_mem_diet(void) {
g_mag_cap_limit = 64;
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
if (g_fast_cap[class_idx] > 0) {
uint16_t limit = (class_idx <= 3) ? 48 : 32;
if (limit < 16) limit = 16;
if (g_fast_cap[class_idx] > limit) {
g_fast_cap[class_idx] = limit;
}
}
TinyTLSList* tls = &g_tls_lists[class_idx];
uint32_t new_cap = tls->cap;
if (new_cap > (uint32_t)g_mag_cap_limit) new_cap = (uint32_t)g_mag_cap_limit;
if (new_cap < 16u) new_cap = 16u;
tls->cap = new_cap;
tls->refill_low = tiny_tls_default_refill(new_cap);
tls->spill_high = tiny_tls_default_spill(new_cap);
tiny_tls_publish_targets(class_idx, new_cap);
}
}