Files
hakmem/core/hakmem_tiny_magazine.c
Moe Charm (CI) acc64f2438 Phase ML1: Pool v1 memset 89.73% overhead 軽量化 (+15.34% improvement)
## Summary
- ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え)
- core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理
- core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off)
- A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy)

## Files Modified
- core/box/pool_api.inc.h: pool_zero_mode_box.h include
- core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避)
- core/hakmem_pool.c: zero mode 参照・制御ロジック
- core/box/pool_zero_mode_box.h (新設): enum/getter
- CURRENT_TASK.md: Phase ML1 結果記載

## Test Results
| Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement |
|-----------|----------------|-----------------|------------|
| 10K       | 3.06 M ops/s   | 3.17 M ops/s    | +3.65%     |
| 1M        | 23.71 M ops/s  | 27.34 M ops/s   | **+15.34%** |

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-10 09:08:18 +09:00

165 lines
5.9 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "hakmem_tiny_magazine.h"
#include "hakmem_tiny_config.h" // Centralized configuration
#include "hakmem_tiny.h" // For TINY_NUM_CLASSES
#include "hakmem_tiny_superslab.h"
#include "hakmem_super_registry.h" // Phase 1: For hak_super_lookup()
#include "tiny_remote.h"
#include "hakmem_prof.h"
#include "hakmem_internal.h"
#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write
#include "box/tiny_mem_stats_box.h"
#include <pthread.h>
static inline uint32_t tiny_self_u32_guard(void) {
return (uint32_t)(uintptr_t)pthread_self();
}
static inline void superslab_dec_active_safe(SuperSlab* ss) {
if (!ss) return;
uint32_t old = atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed);
while (old != 0u) {
if (atomic_compare_exchange_weak_explicit(&ss->total_active_blocks,
&old,
old - 1u,
memory_order_relaxed,
memory_order_relaxed)) {
break;
}
}
}
__thread TinyTLSMag g_tls_mags[TINY_NUM_CLASSES] = {0};
// Global cap limiter (can be reduced via env HAKMEM_TINY_MAG_CAP)
int g_mag_cap_limit = TINY_TLS_MAG_CAP;
// Normal-path per-class overrides (env tunables)
int g_mag_cap_override[TINY_NUM_CLASSES] = {0}; // HAKMEM_TINY_MAG_CAP_C{0..7}
__thread int g_tls_small_mags_inited = 0;
static __thread int g_tls_mag_mem_recorded = 0;
static inline void tiny_mag_record_mem_once(void) {
if (!g_tls_mag_mem_recorded) {
tiny_mem_stats_add_tls_magazine((ssize_t)sizeof(g_tls_mags));
g_tls_mag_mem_recorded = 1;
}
}
// tiny_default_cap() and tiny_cap_max_for_class() now defined as inline functions
// in hakmem_tiny_config.h for centralized configuration
int tiny_effective_cap(int class_idx) {
// Env override takes precedence per class
int ov = g_mag_cap_override[class_idx];
if (ov > 0) return ov;
return tiny_default_cap(class_idx); // Use centralized config function
}
void tiny_small_mags_init_once(void) {
if (__builtin_expect(g_tls_small_mags_inited, 1)) return;
tiny_mag_record_mem_once();
for (int k = 0; k <= 3; k++) {
TinyTLSMag* m = &g_tls_mags[k];
if (m->cap == 0) {
int base = tiny_effective_cap(k);
int cap = (base < TINY_TLS_MAG_CAP) ? base : TINY_TLS_MAG_CAP;
if (g_mag_cap_limit < cap) cap = g_mag_cap_limit;
m->cap = cap;
m->top = 0;
}
}
g_tls_small_mags_inited = 1;
}
void tiny_mag_init_if_needed(int class_idx) {
TinyTLSMag* mag = &g_tls_mags[class_idx];
if (mag->cap == 0) {
tiny_mag_record_mem_once();
int base = tiny_effective_cap(class_idx);
int cap = (base < TINY_TLS_MAG_CAP) ? base : TINY_TLS_MAG_CAP;
if (g_mag_cap_limit < cap) cap = g_mag_cap_limit;
mag->cap = cap;
mag->top = 0;
}
}
// ============================================================================
// ACE Learning Layer: Runtime TLS Capacity Adjustment
// ============================================================================
void hkm_ace_set_tls_capacity(int class_idx, uint32_t capacity) {
// Validate inputs
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
return;
}
if (capacity < 16 || capacity > (uint32_t)tiny_cap_max_for_class(class_idx)) {
return;
}
// Set override (will be used by new thread-local magazines on next init)
// Note: Lazy sync implementation is in hakmem_tiny_magazine.h (inlined)
g_mag_cap_override[class_idx] = (int)capacity;
}
// ============================================================================
// Phase 7.7: Magazine Flush API
// ============================================================================
// Flush Magazine cache for a specific size class
// Forces all cached blocks to be returned to freelists, enabling empty
// SuperSlab detection and deallocation
void hak_tiny_magazine_flush(int class_idx) {
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) return;
// Initialize if needed
tiny_mag_init_if_needed(class_idx);
TinyTLSMag* mag = &g_tls_mags[class_idx];
if (mag->top == 0) return; // Nothing to flush
// Lock and flush entire Magazine to freelist
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
struct timespec tss; int ss_time = hkm_prof_begin(&tss);
(void)ss_time; (void)tss;
pthread_mutex_lock(lock);
// Flush ALL blocks (not just half like normal spill)
int flush_count = mag->top;
uint32_t self_tid = tiny_self_u32_guard();
for (int i = 0; i < flush_count; i++) {
TinyMagItem it = mag->items[--mag->top];
// Return to SuperSlab freelist
SuperSlab* owner_ss = hak_super_lookup(it.ptr);
if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) {
int slab_idx = slab_index_for(owner_ss, it.ptr);
TinySlabMeta* meta = &owner_ss->slabs[slab_idx];
if (!tiny_remote_guard_allow_local_push(owner_ss, slab_idx, meta, it.ptr, "mag_flush", self_tid)) {
(void)ss_remote_push(owner_ss, slab_idx, it.ptr);
if (meta->used > 0) meta->used--;
continue;
}
uint8_t cls = (meta->class_idx < TINY_NUM_CLASSES) ? meta->class_idx : (uint8_t)class_idx;
tiny_next_write(cls, it.ptr, meta->freelist);
meta->freelist = it.ptr;
meta->used--;
// Active was decremented at free time
// 空検出・解放はフラッシュ系APIへ委譲ホットパス除外
}
}
pthread_mutex_unlock(lock);
hkm_prof_end(ss_time, HKP_TINY_SPILL, &tss);
}
// Flush all Magazine caches
// Call this when memory needs to be released (e.g., before measuring RSS)
void hak_tiny_magazine_flush_all(void) {
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
hak_tiny_magazine_flush(i);
}
hak_tiny_trim();
}