Files
hakmem/core/hakmem_tiny_lifecycle.inc
Moe Charm (CI) 52386401b3 Debug Counters Implementation - Clean History
Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-05 12:31:14 +09:00

245 lines
8.8 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem_tiny_lifecycle.inc
// Phase 2D-3: Lifecycle management functions extraction
//
// This file contains lifecycle management functions extracted from hakmem_tiny.c
// to improve code organization. Reduces main file by ~226 lines (16%).
//
// Functions:
// - hak_tiny_trim(): Trim and cleanup operations
// - tiny_tls_cache_drain(): TLS cache draining
// - tiny_apply_mem_diet(): Memory diet mode application
//
// Cold/maintenance path - not performance critical.
#include "tiny_tls_guard.h"
void hak_tiny_trim(void) {
if (!g_tiny_initialized) return;
// Lazy init for SS reserve env
if (__builtin_expect(g_empty_reserve, 1) == -1) {
char* er = getenv("HAKMEM_TINY_SS_RESERVE");
int v = (er ? atoi(er) : EMPTY_SUPERSLAB_RESERVE);
if (v < 0) {
v = 0;
} else if (v > 4) {
v = 4; // guardrails
}
g_empty_reserve = v;
}
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
tiny_tls_cache_drain(class_idx);
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
pthread_mutex_lock(lock);
TinySlab** head = &g_tiny_pool.free_slabs[class_idx];
TinySlab* prev = NULL;
TinySlab* slab = *head;
while (slab) {
TinySlab* next = slab->next;
if (slab->free_count == slab->total_count) {
if (prev) prev->next = next; else *head = next;
release_slab(slab);
slab = next;
continue;
}
prev = slab;
slab = next;
}
pthread_mutex_unlock(lock);
}
// Optional: attempt SuperSlab reclamation for completely empty SS (conservative)
static int g_trim_ss_enabled = -1;
static int g_ss_partial_env = -1;
if (g_trim_ss_enabled == -1) {
char* env = getenv("HAKMEM_TINY_TRIM_SS");
if (env) {
g_trim_ss_enabled = (atoi(env) != 0) ? 1 : 0;
} else {
g_trim_ss_enabled = 1; // default ON for better memory efficiency
}
}
if (g_ss_partial_env == -1) {
char* env = getenv("HAKMEM_TINY_SS_PARTIAL");
if (env) {
g_ss_partial_enable = (atoi(env) != 0) ? 1 : 0;
}
char* interval = getenv("HAKMEM_TINY_SS_PARTIAL_INTERVAL");
if (interval) {
int v = atoi(interval);
if (v < 1) v = 1;
g_ss_partial_interval = (uint32_t)v;
}
g_ss_partial_env = 1;
}
if (!g_trim_ss_enabled) return;
uint32_t partial_epoch = 0;
if (g_ss_partial_enable) {
partial_epoch = atomic_fetch_add_explicit(&g_ss_partial_epoch, 1u, memory_order_relaxed) + 1u;
}
// Walk the registry and collect empty SuperSlabs by class
for (int i = 0; i < SUPER_REG_SIZE; i++) {
SuperRegEntry* e = &g_super_reg[i];
uintptr_t base = atomic_load_explicit((_Atomic uintptr_t*)&e->base, memory_order_acquire);
if (base == 0) continue;
SuperSlab* ss = e->ss;
if (!ss || ss->magic != SUPERSLAB_MAGIC) continue;
// Only consider completely empty SuperSlabs
if (ss->total_active_blocks != 0) continue;
int k = ss->size_class;
if (k < 0 || k >= TINY_NUM_CLASSES) continue;
// Do not free if current thread still caches this SS in TLS
if (g_tls_slabs[k].ss == ss) continue;
// Keep up to EMPTY_SUPERSLAB_RESERVE per class as reserve; free extras
pthread_mutex_lock(&g_empty_lock);
if (g_empty_reserve == 0) {
pthread_mutex_unlock(&g_empty_lock);
if (superslab_ref_get(ss) == 0) {
superslab_free(ss);
}
continue;
}
if (g_empty_superslabs[k] == NULL) {
g_empty_superslabs[k] = ss;
g_empty_counts[k] = 1;
superslab_partial_release(ss, partial_epoch);
pthread_mutex_unlock(&g_empty_lock);
continue;
}
// If same as reserved, nothing to do
if (g_empty_superslabs[k] == ss) {
superslab_partial_release(ss, partial_epoch);
pthread_mutex_unlock(&g_empty_lock);
continue;
}
int can_free = (g_empty_counts[k] >= g_empty_reserve);
if (!can_free) {
// Replace reserve with this newer SS
g_empty_superslabs[k] = ss;
g_empty_counts[k] = 1;
superslab_partial_release(ss, partial_epoch);
pthread_mutex_unlock(&g_empty_lock);
continue;
}
pthread_mutex_unlock(&g_empty_lock);
// Free outside of the empty_lock保守的: refcount==0 のときのみ)
if (superslab_ref_get(ss) == 0) {
superslab_free(ss);
}
}
}
static void tiny_tls_cache_drain(int class_idx) {
TinyTLSList* tls = &g_tls_lists[class_idx];
// Drain TLS SLL cache
void* sll = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = NULL;
g_tls_sll_count[class_idx] = 0;
while (sll) {
void* next = *(void**)sll;
tiny_tls_list_guard_push(class_idx, tls, sll);
tls_list_push(tls, sll);
sll = next;
}
// Drain fast tier cache
void* fast = g_fast_head[class_idx];
g_fast_head[class_idx] = NULL;
g_fast_count[class_idx] = 0;
while (fast) {
void* next = *(void**)fast;
tiny_tls_list_guard_push(class_idx, tls, fast);
tls_list_push(tls, fast);
fast = next;
}
// Spill TLS list back to owners
void* head = NULL;
void* tail = NULL;
while (1) {
uint32_t taken = tls_list_bulk_take(tls, 0u, &head, &tail);
if (taken == 0u || head == NULL) break;
void* cur = head;
while (cur) {
void* next = *(void**)cur;
SuperSlab* ss = hak_super_lookup(cur);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
hak_tiny_free_superslab(cur, ss);
} else {
TinySlab* slab = hak_tiny_owner_slab(cur);
if (slab) {
int cls = slab->class_idx;
size_t block_size = g_tiny_class_sizes[cls];
int block_idx = (int)(((uintptr_t)cur - (uintptr_t)slab->base) / block_size);
pthread_mutex_t* lock = &g_tiny_class_locks[cls].m;
pthread_mutex_lock(lock);
if (hak_tiny_is_used(slab, block_idx)) {
hak_tiny_set_free(slab, block_idx);
int was_full = (slab->free_count == 0);
slab->free_count++;
g_tiny_pool.free_count[cls]++;
if (was_full) {
move_to_free_list(cls, slab);
}
if (slab->free_count == slab->total_count) {
TinySlab** headp = &g_tiny_pool.free_slabs[cls];
TinySlab* prev = NULL;
for (TinySlab* s = *headp; s; prev = s, s = s->next) {
if (s == slab) {
if (prev) prev->next = s->next;
else *headp = s->next;
break;
}
}
release_slab(slab);
}
}
pthread_mutex_unlock(lock);
}
}
cur = next;
}
}
// Release TLS-bound SuperSlab reference when caches are empty
TinyTLSSlab* tls_slab = &g_tls_slabs[class_idx];
SuperSlab* held_ss = tls_slab->ss;
if (held_ss) {
int keep_binding = 0;
if (tls_slab->meta && tls_slab->meta->used > 0) {
keep_binding = 1;
}
if (!keep_binding) {
tls_slab->ss = NULL;
tls_slab->meta = NULL;
tls_slab->slab_base = NULL;
tls_slab->slab_idx = 0;
superslab_ref_dec(held_ss);
}
}
g_tls_active_slab_a[class_idx] = NULL;
g_tls_active_slab_b[class_idx] = NULL;
}
static void tiny_apply_mem_diet(void) {
g_mag_cap_limit = 64;
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
if (g_fast_cap[class_idx] > 0) {
uint16_t limit = (class_idx <= 3) ? 48 : 32;
if (limit < 16) limit = 16;
if (g_fast_cap[class_idx] > limit) {
g_fast_cap[class_idx] = limit;
}
}
TinyTLSList* tls = &g_tls_lists[class_idx];
uint32_t new_cap = tls->cap;
if (new_cap > (uint32_t)g_mag_cap_limit) new_cap = (uint32_t)g_mag_cap_limit;
if (new_cap < 16u) new_cap = 16u;
tls->cap = new_cap;
tls->refill_low = tiny_tls_default_refill(new_cap);
tls->spill_high = tiny_tls_default_spill(new_cap);
tiny_tls_publish_targets(class_idx, new_cap);
}
}