Incremental improvements: mid_desc cache, pool hotpath optimization, and doc updates

**Changes:**
- core/box/pool_api.inc.h: Code organization and micro-optimizations
- CURRENT_TASK.md: Updated Phase MD1 (mid_desc TLS cache: +3.2% for C6-heavy)
- docs/analysis files: Various analysis and documentation updates
- AGENTS.md: Agent role clarifications
- TINY_FRONT_V3_FLATTENING_GUIDE.md: Flattening strategy documentation

**Verification:**
- random_mixed_hakmem: 44.8M ops/s (1M iterations, 400 working set)
- No segfaults or assertions across all benchmark variants
- Stable performance across multiple runs

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-10 14:00:57 +09:00
parent 0e5a2634bc
commit 406a2f4d26
9 changed files with 100 additions and 9 deletions

View File

@ -6,6 +6,7 @@
#include "box/pool_hotbox_v2_box.h"
#include "box/tiny_heap_env_box.h" // TinyHeap profile (C7_SAFE では flatten を無効化)
#include "box/pool_zero_mode_box.h" // Pool zeroing policy (env cached)
#include <stdint.h>
// Pool v2 is experimental. Default OFF (use legacy v1 path).
static inline int hak_pool_v2_enabled(void) {
@ -63,6 +64,37 @@ static inline int hak_pool_v1_flatten_stats_enabled(void) {
return g;
}
// Mid desc lookup TLS cache (mid bench opt-in; default OFF)
static inline int hak_mid_desc_cache_enabled(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_MID_DESC_CACHE_ENABLED");
g = (e && *e && *e != '0') ? 1 : 0;
}
return g;
}
typedef struct MidDescCache {
void* last_page;
MidPageDesc* last_desc;
} MidDescCache;
static __thread MidDescCache g_mid_desc_cache = {0};
static inline MidPageDesc* mid_desc_lookup_cached(void* addr) {
if (!hak_mid_desc_cache_enabled()) return mid_desc_lookup(addr);
void* page = (void*)((uintptr_t)addr & ~((uintptr_t)POOL_PAGE_SIZE - 1));
if (g_mid_desc_cache.last_desc && g_mid_desc_cache.last_page == page) {
return g_mid_desc_cache.last_desc;
}
MidPageDesc* d = mid_desc_lookup(addr);
if (d) {
g_mid_desc_cache.last_page = page;
g_mid_desc_cache.last_desc = d;
}
return d;
}
typedef struct PoolV1FlattenStats {
_Atomic uint64_t alloc_tls_hit;
@ -422,7 +454,7 @@ static inline void hak_pool_free_v2_impl(void* ptr, size_t size, uintptr_t site_
void* raw = (char*)ptr - HEADER_SIZE;
AllocHeader* hdr = (AllocHeader*)raw;
MidPageDesc* d_desc = mid_desc_lookup(ptr);
MidPageDesc* d_desc = mid_desc_lookup_cached(ptr);
int mid_by_desc = d_desc != NULL;
if (!mid_by_desc && g_hdr_light_enabled < 2) {
if (hdr->magic != HAKMEM_MAGIC) { MF2_ERROR_LOG("Invalid magic 0x%X in pool_free, expected 0x%X", hdr->magic, HAKMEM_MAGIC); return; }
@ -490,7 +522,7 @@ static inline void hak_pool_free_v2_impl(void* ptr, size_t size, uintptr_t site_
static inline int hak_pool_mid_lookup_v2_impl(void* ptr, size_t* out_size) {
if (g_mf2_enabled) { MidPage* page = mf2_addr_to_page(ptr); if (page) { int c = (int)page->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } }
MidPageDesc* d = mid_desc_lookup(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1;
MidPageDesc* d = mid_desc_lookup_cached(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1;
}
static inline void hak_pool_free_fast_v2_impl(void* ptr, uintptr_t site_id) {
@ -499,7 +531,7 @@ static inline void hak_pool_free_fast_v2_impl(void* ptr, uintptr_t site_id) {
MidPage* page = mf2_addr_to_page(ptr);
if (page) { mf2_free(ptr); return; }
}
MidPageDesc* d = mid_desc_lookup(ptr);
MidPageDesc* d = mid_desc_lookup_cached(ptr);
if (!d) return;
size_t sz = g_class_sizes[(int)d->class_idx];
if (sz == 0) return;
@ -803,7 +835,7 @@ static inline void hak_pool_free_v1_impl(void* ptr, size_t size, uintptr_t site_
void* raw = (char*)ptr - HEADER_SIZE;
AllocHeader* hdr = (AllocHeader*)raw;
int mid_by_desc = 0; MidPageDesc* d_desc = mid_desc_lookup(ptr);
int mid_by_desc = 0; MidPageDesc* d_desc = mid_desc_lookup_cached(ptr);
if (d_desc) mid_by_desc = 1;
if (!mid_by_desc && g_hdr_light_enabled < 2) {
if (hdr->magic != HAKMEM_MAGIC) { MF2_ERROR_LOG("Invalid magic 0x%X in pool_free, expected 0x%X", hdr->magic, HAKMEM_MAGIC); return; }
@ -814,7 +846,7 @@ static inline void hak_pool_free_v1_impl(void* ptr, size_t size, uintptr_t site_
PoolBlock* block = (PoolBlock*)raw;
if (g_pool.tls_free_enabled) {
int same_thread = 0;
if (g_hdr_light_enabled >= 1) { MidPageDesc* d = mid_desc_lookup(raw); if (d && d->owner_tid != 0 && d->owner_tid == (uint64_t)(uintptr_t)pthread_self()) { same_thread = 1; } }
if (g_hdr_light_enabled >= 1) { MidPageDesc* d = mid_desc_lookup_cached(raw); if (d && d->owner_tid != 0 && d->owner_tid == (uint64_t)(uintptr_t)pthread_self()) { same_thread = 1; } }
else if (hdr->owner_tid != 0 && hdr->owner_tid == (uintptr_t)(uintptr_t)pthread_self()) { same_thread = 1; }
if (same_thread) {
PoolTLSRing* ring = &g_tls_bin[class_idx].ring;
@ -846,7 +878,7 @@ static inline void hak_pool_free_v1_impl(void* ptr, size_t size, uintptr_t site_
}
}
} else {
if (g_tc_enabled) { uint64_t owner_tid = 0; if (g_hdr_light_enabled < 2) owner_tid = hdr->owner_tid; if (owner_tid == 0) { MidPageDesc* d = mid_desc_lookup(raw); if (d) owner_tid = d->owner_tid; } if (owner_tid != 0) { MidTC* otc = mid_tc_lookup_by_tid(owner_tid); if (otc) { mid_tc_push(otc, class_idx, block); return; } } }
if (g_tc_enabled) { uint64_t owner_tid = 0; if (g_hdr_light_enabled < 2) owner_tid = hdr->owner_tid; if (owner_tid == 0) { MidPageDesc* d = mid_desc_lookup_cached(raw); if (d) owner_tid = d->owner_tid; } if (owner_tid != 0) { MidTC* otc = mid_tc_lookup_by_tid(owner_tid); if (otc) { mid_tc_push(otc, class_idx, block); return; } } }
int shard = hak_pool_get_shard_index(site_id); uintptr_t old_head; HKM_TIME_START(t_remote_push2);
do { old_head = atomic_load_explicit(&g_pool.remote_head[class_idx][shard], memory_order_acquire); block->next = (PoolBlock*)old_head; } while (!atomic_compare_exchange_weak_explicit(&g_pool.remote_head[class_idx][shard], &old_head, (uintptr_t)block, memory_order_release, memory_order_relaxed));
atomic_fetch_add_explicit(&g_pool.remote_count[class_idx][shard], 1, memory_order_relaxed); HKM_TIME_END(HKM_CAT_POOL_REMOTE_PUSH, t_remote_push2); set_nonempty_bit(class_idx, shard);
@ -898,7 +930,7 @@ static inline void hak_pool_free_v1_flat(void* ptr, size_t size, uintptr_t site_
if (!hak_pool_is_poolable(size)) return;
void* raw = (char*)ptr - HEADER_SIZE;
MidPageDesc* d_desc = mid_desc_lookup(ptr);
MidPageDesc* d_desc = mid_desc_lookup_cached(ptr);
if (!d_desc) {
if (hak_pool_v1_flatten_stats_enabled()) {
atomic_fetch_add_explicit(&g_pool_v1_flat_stats.free_fallback_v1, 1, memory_order_relaxed);
@ -965,7 +997,7 @@ static inline void hak_pool_free_v1_flat(void* ptr, size_t size, uintptr_t site_
static inline int hak_pool_mid_lookup_v1_impl(void* ptr, size_t* out_size) {
if (g_mf2_enabled) { MidPage* page = mf2_addr_to_page(ptr); if (page) { int c = (int)page->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1; } }
MidPageDesc* d = mid_desc_lookup(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1;
MidPageDesc* d = mid_desc_lookup_cached(ptr); if (!d) return 0; int c = (int)d->class_idx; if (c < 0 || c >= POOL_NUM_CLASSES) return 0; size_t sz = g_class_sizes[c]; if (sz == 0) return 0; if (out_size) *out_size = sz; return 1;
}
static inline void hak_pool_free_fast_v1_impl(void* ptr, uintptr_t site_id) {
@ -974,7 +1006,7 @@ static inline void hak_pool_free_fast_v1_impl(void* ptr, uintptr_t site_id) {
MidPage* page = mf2_addr_to_page(ptr);
if (page) { mf2_free(ptr); return; }
}
MidPageDesc* d = mid_desc_lookup(ptr);
MidPageDesc* d = mid_desc_lookup_cached(ptr);
if (!d) return;
size_t sz = g_class_sizes[(int)d->class_idx];
if (sz == 0) return;