Add comprehensive design docs and research boxes: - docs/analysis/ALLOC_TINY_FAST_DUALHOT_1_DESIGN.md: ALLOC DUALHOT investigation - docs/analysis/FREE_TINY_FAST_DUALHOT_1_DESIGN.md: FREE DUALHOT final specs - docs/analysis/FREE_TINY_FAST_HOTCOLD_OPT_1_DESIGN.md: Hot/Cold split research - docs/analysis/POOL_MID_INUSE_DEFERRED_DN_BATCH_DESIGN.md: Deferred batching design - docs/analysis/POOL_MID_INUSE_DEFERRED_REGRESSION_ANALYSIS.md: Stats overhead findings - docs/analysis/MID_DESC_CACHE_BENCHMARK_2025-12-12.md: Cache measurement results - docs/analysis/LAST_MATCH_CACHE_IMPLEMENTATION.md: TLS cache investigation Research boxes (SS page table): - core/box/ss_pt_env_box.h: HAKMEM_SS_LOOKUP_KIND gate - core/box/ss_pt_types_box.h: 2-level page table structures - core/box/ss_pt_lookup_box.h: ss_pt_lookup() implementation - core/box/ss_pt_register_box.h: Page table registration - core/box/ss_pt_impl.c: Global definitions Updates: - docs/specs/ENV_VARS_COMPLETE.md: HOTCOLD, DEFERRED, SS_LOOKUP env vars - core/box/hak_free_api.inc.h: FREE-DISPATCH-SSOT integration - core/box/pool_mid_inuse_deferred_box.h: Deferred API updates - core/box/pool_mid_inuse_deferred_stats_box.h: Stats collection - core/hakmem_super_registry: SS page table integration Current Status: - FREE-TINY-FAST-DUALHOT-1: +13% improvement, ready for adoption - ALLOC-TINY-FAST-DUALHOT-1: -2% regression, frozen as research box - Next: Optimization roadmap per ROI (mimalloc gap 2.5x) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
75 lines
2.4 KiB
C
75 lines
2.4 KiB
C
#ifndef SS_PT_REGISTER_BOX_H
|
|
#define SS_PT_REGISTER_BOX_H
|
|
|
|
#include "ss_pt_types_box.h"
|
|
#include <sys/mman.h>
|
|
|
|
// Register single 512KB chunk (cold path)
|
|
static inline void ss_pt_register_chunk(void* chunk_base, struct SuperSlab* ss) {
|
|
uintptr_t p = (uintptr_t)chunk_base;
|
|
|
|
// Out-of-range check
|
|
if (p >> 48) return;
|
|
|
|
uint32_t l1_idx = SS_PT_L1_INDEX(chunk_base);
|
|
uint32_t l2_idx = SS_PT_L2_INDEX(chunk_base);
|
|
|
|
// Ensure L2 exists
|
|
SsPtL2* l2 = atomic_load_explicit(&g_ss_pt.l2[l1_idx], memory_order_acquire);
|
|
if (l2 == NULL) {
|
|
SsPtL2* new_l2 = (SsPtL2*)mmap(NULL, sizeof(SsPtL2),
|
|
PROT_READ | PROT_WRITE,
|
|
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
if (new_l2 == MAP_FAILED) return;
|
|
|
|
SsPtL2* expected = NULL;
|
|
if (!atomic_compare_exchange_strong_explicit(&g_ss_pt.l2[l1_idx],
|
|
&expected, new_l2, memory_order_acq_rel, memory_order_acquire)) {
|
|
munmap(new_l2, sizeof(SsPtL2));
|
|
l2 = expected;
|
|
} else {
|
|
l2 = new_l2;
|
|
}
|
|
}
|
|
|
|
// Store SuperSlab pointer (release)
|
|
atomic_store_explicit(&l2->entries[l2_idx], ss, memory_order_release);
|
|
}
|
|
|
|
// Unregister single chunk (NULL store, L2 never freed)
|
|
static inline void ss_pt_unregister_chunk(void* chunk_base) {
|
|
uintptr_t p = (uintptr_t)chunk_base;
|
|
if (p >> 48) return;
|
|
|
|
uint32_t l1_idx = SS_PT_L1_INDEX(chunk_base);
|
|
uint32_t l2_idx = SS_PT_L2_INDEX(chunk_base);
|
|
|
|
SsPtL2* l2 = atomic_load_explicit(&g_ss_pt.l2[l1_idx], memory_order_acquire);
|
|
if (l2) {
|
|
atomic_store_explicit(&l2->entries[l2_idx], NULL, memory_order_release);
|
|
}
|
|
}
|
|
|
|
// Register all chunks of a SuperSlab (1MB=2 chunks, 2MB=4 chunks)
|
|
static inline void ss_pt_register(struct SuperSlab* ss, void* base, int lg_size) {
|
|
size_t size = (size_t)1 << lg_size;
|
|
size_t chunk_size = (size_t)1 << SS_PT_CHUNK_LG; // 512KB
|
|
size_t n_chunks = size / chunk_size;
|
|
|
|
for (size_t i = 0; i < n_chunks; i++) {
|
|
ss_pt_register_chunk((char*)base + i * chunk_size, ss);
|
|
}
|
|
}
|
|
|
|
static inline void ss_pt_unregister(void* base, int lg_size) {
|
|
size_t size = (size_t)1 << lg_size;
|
|
size_t chunk_size = (size_t)1 << SS_PT_CHUNK_LG;
|
|
size_t n_chunks = size / chunk_size;
|
|
|
|
for (size_t i = 0; i < n_chunks; i++) {
|
|
ss_pt_unregister_chunk((char*)base + i * chunk_size);
|
|
}
|
|
}
|
|
|
|
#endif
|