Files
hakmem/core/box/ss_pt_register_box.h
Moe Charm (CI) d9991f39ff Phase ALLOC-TINY-FAST-DUALHOT-1 & Optimization Roadmap Update
Add comprehensive design docs and research boxes:
- docs/analysis/ALLOC_TINY_FAST_DUALHOT_1_DESIGN.md: ALLOC DUALHOT investigation
- docs/analysis/FREE_TINY_FAST_DUALHOT_1_DESIGN.md: FREE DUALHOT final specs
- docs/analysis/FREE_TINY_FAST_HOTCOLD_OPT_1_DESIGN.md: Hot/Cold split research
- docs/analysis/POOL_MID_INUSE_DEFERRED_DN_BATCH_DESIGN.md: Deferred batching design
- docs/analysis/POOL_MID_INUSE_DEFERRED_REGRESSION_ANALYSIS.md: Stats overhead findings
- docs/analysis/MID_DESC_CACHE_BENCHMARK_2025-12-12.md: Cache measurement results
- docs/analysis/LAST_MATCH_CACHE_IMPLEMENTATION.md: TLS cache investigation

Research boxes (SS page table):
- core/box/ss_pt_env_box.h: HAKMEM_SS_LOOKUP_KIND gate
- core/box/ss_pt_types_box.h: 2-level page table structures
- core/box/ss_pt_lookup_box.h: ss_pt_lookup() implementation
- core/box/ss_pt_register_box.h: Page table registration
- core/box/ss_pt_impl.c: Global definitions

Updates:
- docs/specs/ENV_VARS_COMPLETE.md: HOTCOLD, DEFERRED, SS_LOOKUP env vars
- core/box/hak_free_api.inc.h: FREE-DISPATCH-SSOT integration
- core/box/pool_mid_inuse_deferred_box.h: Deferred API updates
- core/box/pool_mid_inuse_deferred_stats_box.h: Stats collection
- core/hakmem_super_registry: SS page table integration

Current Status:
- FREE-TINY-FAST-DUALHOT-1: +13% improvement, ready for adoption
- ALLOC-TINY-FAST-DUALHOT-1: -2% regression, frozen as research box
- Next: Optimization roadmap per ROI (mimalloc gap 2.5x)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-13 05:35:46 +09:00

75 lines
2.4 KiB
C

#ifndef SS_PT_REGISTER_BOX_H
#define SS_PT_REGISTER_BOX_H
#include "ss_pt_types_box.h"
#include <sys/mman.h>
// Register single 512KB chunk (cold path)
static inline void ss_pt_register_chunk(void* chunk_base, struct SuperSlab* ss) {
uintptr_t p = (uintptr_t)chunk_base;
// Out-of-range check
if (p >> 48) return;
uint32_t l1_idx = SS_PT_L1_INDEX(chunk_base);
uint32_t l2_idx = SS_PT_L2_INDEX(chunk_base);
// Ensure L2 exists
SsPtL2* l2 = atomic_load_explicit(&g_ss_pt.l2[l1_idx], memory_order_acquire);
if (l2 == NULL) {
SsPtL2* new_l2 = (SsPtL2*)mmap(NULL, sizeof(SsPtL2),
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (new_l2 == MAP_FAILED) return;
SsPtL2* expected = NULL;
if (!atomic_compare_exchange_strong_explicit(&g_ss_pt.l2[l1_idx],
&expected, new_l2, memory_order_acq_rel, memory_order_acquire)) {
munmap(new_l2, sizeof(SsPtL2));
l2 = expected;
} else {
l2 = new_l2;
}
}
// Store SuperSlab pointer (release)
atomic_store_explicit(&l2->entries[l2_idx], ss, memory_order_release);
}
// Unregister single chunk (NULL store, L2 never freed)
static inline void ss_pt_unregister_chunk(void* chunk_base) {
uintptr_t p = (uintptr_t)chunk_base;
if (p >> 48) return;
uint32_t l1_idx = SS_PT_L1_INDEX(chunk_base);
uint32_t l2_idx = SS_PT_L2_INDEX(chunk_base);
SsPtL2* l2 = atomic_load_explicit(&g_ss_pt.l2[l1_idx], memory_order_acquire);
if (l2) {
atomic_store_explicit(&l2->entries[l2_idx], NULL, memory_order_release);
}
}
// Register all chunks of a SuperSlab (1MB=2 chunks, 2MB=4 chunks)
static inline void ss_pt_register(struct SuperSlab* ss, void* base, int lg_size) {
size_t size = (size_t)1 << lg_size;
size_t chunk_size = (size_t)1 << SS_PT_CHUNK_LG; // 512KB
size_t n_chunks = size / chunk_size;
for (size_t i = 0; i < n_chunks; i++) {
ss_pt_register_chunk((char*)base + i * chunk_size, ss);
}
}
static inline void ss_pt_unregister(void* base, int lg_size) {
size_t size = (size_t)1 << lg_size;
size_t chunk_size = (size_t)1 << SS_PT_CHUNK_LG;
size_t n_chunks = size / chunk_size;
for (size_t i = 0; i < n_chunks; i++) {
ss_pt_unregister_chunk((char*)base + i * chunk_size);
}
}
#endif