Refactor: Split hakmem_tiny_superslab.c + unified backend exit point

Major refactoring to improve maintainability and debugging:

1. Split hakmem_tiny_superslab.c (1521 lines) into 7 focused files:
   - superslab_allocate.c: SuperSlab allocation/deallocation
   - superslab_backend.c: Backend allocation paths (legacy, shared)
   - superslab_ace.c: ACE (Adaptive Cache Engine) logic
   - superslab_slab.c: Slab initialization and bitmap management
   - superslab_cache.c: LRU cache and prewarm cache management
   - superslab_head.c: SuperSlabHead management and expansion
   - superslab_stats.c: Statistics tracking and debugging

2. Created hakmem_tiny_superslab_internal.h for shared declarations

3. Added superslab_return_block() as single exit point for header writing:
   - All backend allocations now go through this helper
   - Prevents bugs where headers are forgotten in some paths
   - Makes future debugging easier

4. Updated Makefile for new file structure

5. Added header writing to ss_legacy_backend_box.c and
   ss_unified_backend_box.c (though not currently linked)

Note: Header corruption bug in Larson benchmark still exists.
Class 1-6 allocations go through TLS refill/carve paths, not backend.
Further investigation needed.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-29 05:13:04 +09:00
parent b52e1985e6
commit 6ac6f5ae1b
13 changed files with 1734 additions and 1542 deletions

View File

@ -191,12 +191,12 @@ LDFLAGS += $(EXTRA_LDFLAGS)
# Targets
TARGET = test_hakmem
OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o test_hakmem.o
OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o test_hakmem.o
OBJS = $(OBJS_BASE)
# Shared library
SHARED_LIB = libhakmem.so
SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o hakmem_tiny_superslab_shared.o hakmem_smallmid_shared.o hakmem_smallmid_superslab_shared.o core/box/superslab_expansion_box_shared.o core/box/integrity_box_shared.o core/box/mailbox_box_shared.o core/box/front_gate_box_shared.o core/box/front_gate_classifier_shared.o core/box/free_local_box_shared.o core/box/free_remote_box_shared.o core/box/free_publish_box_shared.o core/box/capacity_box_shared.o core/box/carve_push_box_shared.o core/box/unified_batch_box_shared.o core/box/prewarm_box_shared.o core/box/ss_hot_prewarm_box_shared.o core/box/front_metrics_box_shared.o core/box/bench_fast_box_shared.o core/box/pagefault_telemetry_box_shared.o core/box/tiny_sizeclass_hist_box_shared.o core/page_arena_shared.o core/front/tiny_unified_cache_shared.o core/tiny_alloc_fast_push_shared.o core/link_stubs_shared.o core/tiny_failfast_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_sfc_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_tiny_remote_target_shared.o hakmem_tiny_bg_spill_shared.o tiny_adaptive_sizing_shared.o hakmem_mid_mt_shared.o hakmem_super_registry_shared.o hakmem_shared_pool_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_ace_controller_shared.o hakmem_ace_metrics_shared.o hakmem_ace_ucb1_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o
SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o superslab_allocate_shared.o superslab_stats_shared.o superslab_cache_shared.o superslab_ace_shared.o superslab_slab_shared.o superslab_backend_shared.o superslab_head_shared.o hakmem_smallmid_shared.o hakmem_smallmid_superslab_shared.o core/box/superslab_expansion_box_shared.o core/box/integrity_box_shared.o core/box/mailbox_box_shared.o core/box/front_gate_box_shared.o core/box/front_gate_classifier_shared.o core/box/free_local_box_shared.o core/box/free_remote_box_shared.o core/box/free_publish_box_shared.o core/box/capacity_box_shared.o core/box/carve_push_box_shared.o core/box/unified_batch_box_shared.o core/box/prewarm_box_shared.o core/box/ss_hot_prewarm_box_shared.o core/box/front_metrics_box_shared.o core/box/bench_fast_box_shared.o core/box/pagefault_telemetry_box_shared.o core/box/tiny_sizeclass_hist_box_shared.o core/page_arena_shared.o core/front/tiny_unified_cache_shared.o core/tiny_alloc_fast_push_shared.o core/link_stubs_shared.o core/tiny_failfast_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_sfc_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_tiny_remote_target_shared.o hakmem_tiny_bg_spill_shared.o tiny_adaptive_sizing_shared.o hakmem_mid_mt_shared.o hakmem_super_registry_shared.o hakmem_shared_pool_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_ace_controller_shared.o hakmem_ace_metrics_shared.o hakmem_ace_ucb1_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o
# Pool TLS Phase 1 (enable with POOL_TLS_PHASE1=1)
ifeq ($(POOL_TLS_PHASE1),1)
@ -223,7 +223,7 @@ endif
# Benchmark targets
BENCH_HAKMEM = bench_allocators_hakmem
BENCH_SYSTEM = bench_allocators_system
BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o
BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o
BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE)
ifeq ($(POOL_TLS_PHASE1),1)
BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
@ -400,7 +400,7 @@ test-box-refactor: box-refactor
./larson_hakmem 10 8 128 1024 1 12345 4
# Phase 4: Tiny Pool benchmarks (properly linked with hakmem)
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o
TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE)
ifeq ($(POOL_TLS_PHASE1),1)
TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o

View File

@ -5,6 +5,7 @@
#include "ss_allocation_box.h"
#include "hakmem_tiny_config.h"
#include "hakmem_tiny.h" // For tiny_self_u32
#include "../tiny_region_id.h" // For tiny_region_id_write_header
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
@ -88,7 +89,11 @@ void* hak_tiny_alloc_superslab_backend_hint(int class_idx)
g_ss_legacy_hint_ss[class_idx] = NULL;
}
#if HAKMEM_TINY_HEADER_CLASSIDX
return tiny_region_id_write_header(base, class_idx);
#else
return (void*)base;
#endif
}
// ============================================================================
@ -156,7 +161,11 @@ void* hak_tiny_alloc_superslab_backend_legacy(int class_idx)
hak_tiny_ss_hint_record(class_idx, chunk, slab_idx);
meta->used++;
atomic_fetch_add_explicit(&chunk->total_active_blocks, 1, memory_order_relaxed);
#if HAKMEM_TINY_HEADER_CLASSIDX
return tiny_region_id_write_header(base, class_idx);
#else
return (void*)base;
#endif
}
}
chunk = chunk->next_chunk;
@ -197,7 +206,11 @@ void* hak_tiny_alloc_superslab_backend_legacy(int class_idx)
hak_tiny_ss_hint_record(class_idx, new_chunk, slab_idx);
meta->used++;
atomic_fetch_add_explicit(&new_chunk->total_active_blocks, 1, memory_order_relaxed);
#if HAKMEM_TINY_HEADER_CLASSIDX
return tiny_region_id_write_header(base, class_idx);
#else
return (void*)base;
#endif
}
}

View File

@ -7,6 +7,7 @@
#include "hakmem_shared_pool.h"
#include "hakmem_tiny_config.h"
#include "ss_allocation_box.h"
#include "../tiny_region_id.h" // For tiny_region_id_write_header
#include <stdio.h>
#include <stdlib.h>
@ -109,7 +110,11 @@ void* hak_tiny_alloc_superslab_backend_shared(int class_idx)
hak_tiny_ss_hint_record(class_idx, ss, slab_idx);
#if HAKMEM_TINY_HEADER_CLASSIDX
return tiny_region_id_write_header(base, class_idx);
#else
return (void*)base;
#endif
}
// ============================================================================

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,128 @@
// hakmem_tiny_superslab_internal.h - Internal declarations for superslab refactor
// Purpose: Shared declarations between superslab implementation files
// License: MIT
// Date: 2025-11-28
#ifndef HAKMEM_TINY_SUPERSLAB_INTERNAL_H
#define HAKMEM_TINY_SUPERSLAB_INTERNAL_H
#include "hakmem_tiny_superslab.h"
#include "box/ss_hot_cold_box.h"
#include "hakmem_super_registry.h"
#include "hakmem_tiny.h"
#include "hakmem_tiny_config.h"
#include "hakmem_shared_pool.h"
#include "hakmem_internal.h"
#include "tiny_region_id.h"
#include "hakmem_tiny_integrity.h"
#include "box/tiny_next_ptr_box.h"
#include "box/slab_freelist_atomic.h"
#include <sys/mman.h>
#include <sys/resource.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>
// ============================================================================
// Global Variables (defined in superslab_stats.c)
// ============================================================================
extern pthread_mutex_t g_superslab_lock;
extern uint64_t g_superslabs_allocated;
extern uint64_t g_superslabs_freed;
extern uint64_t g_bytes_allocated;
extern _Atomic uint64_t g_ss_active_dec_calls;
extern _Atomic uint64_t g_hak_tiny_free_calls;
extern _Atomic uint64_t g_ss_remote_push_calls;
extern _Atomic uint64_t g_free_ss_enter;
extern _Atomic uint64_t g_free_local_box_calls;
extern _Atomic uint64_t g_free_remote_box_calls;
extern uint64_t g_ss_alloc_by_class[8];
extern uint64_t g_ss_freed_by_class[8];
extern _Atomic uint64_t g_ss_mmap_count;
extern _Atomic uint64_t g_final_fallback_mmap_count;
// ============================================================================
// SuperSlabHead Management (defined in superslab_head.c)
// ============================================================================
extern SuperSlabHead* g_superslab_heads[TINY_NUM_CLASSES_SS];
// ============================================================================
// Cache System (defined in superslab_cache.c)
// ============================================================================
typedef struct SuperslabCacheEntry {
struct SuperslabCacheEntry* next;
} SuperslabCacheEntry;
extern SuperslabCacheEntry* g_ss_cache_head[8];
extern size_t g_ss_cache_count[8];
extern size_t g_ss_cache_cap[8];
extern size_t g_ss_precharge_target[8];
extern _Atomic int g_ss_precharge_done[8];
extern int g_ss_cache_enabled;
extern pthread_once_t g_ss_cache_once;
extern pthread_mutex_t g_ss_cache_lock[8];
extern uint64_t g_ss_cache_hits[8];
extern uint64_t g_ss_cache_misses[8];
extern uint64_t g_ss_cache_puts[8];
extern uint64_t g_ss_cache_drops[8];
extern uint64_t g_ss_cache_precharged[8];
extern uint64_t g_superslabs_reused;
extern uint64_t g_superslabs_cached;
// Cache functions (defined in superslab_cache.c)
void ss_cache_global_init(void);
void ss_cache_ensure_init(void);
void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int populate);
void ss_cache_precharge(uint8_t size_class, size_t ss_size, uintptr_t ss_mask);
SuperslabCacheEntry* ss_cache_pop(uint8_t size_class);
int ss_cache_push(uint8_t size_class, SuperSlab* ss);
// ============================================================================
// ACE (Adaptive Cache Engine) - defined in superslab_ace.c
// ============================================================================
extern SuperSlabACEState g_ss_ace[TINY_NUM_CLASSES_SS];
extern int g_ss_force_lg;
extern _Atomic int g_ss_populate_once;
uint8_t hak_tiny_superslab_next_lg(int class_idx);
void ace_observe_and_decide(int k);
// ============================================================================
// Statistics (defined in superslab_stats.c)
// ============================================================================
void ss_stats_os_alloc(uint8_t size_class, size_t ss_size);
void ss_stats_cache_reuse(void);
void ss_stats_cache_store(void);
void log_superslab_oom_once(size_t ss_size, size_t alloc_size, int err);
// ============================================================================
// Slab Management (defined in superslab_slab.c)
// ============================================================================
// Drain remote MPSC stack into freelist (ownership already verified by caller)
void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta);
// ============================================================================
// Backend Allocation (defined in superslab_backend.c)
// ============================================================================
void* hak_tiny_alloc_superslab_backend_legacy(int class_idx);
void* hak_tiny_alloc_superslab_backend_shared(int class_idx);
// ============================================================================
// SuperSlabHead Management (defined in superslab_head.c)
// ============================================================================
SuperSlabHead* init_superslab_head(int class_idx);
int expand_superslab_head(SuperSlabHead* head);
SuperSlab* find_chunk_for_ptr(void* ptr, int class_idx);
#endif // HAKMEM_TINY_SUPERSLAB_INTERNAL_H

230
core/superslab_ace.c Normal file
View File

@ -0,0 +1,230 @@
// superslab_ace.c - ACE (Adaptive Cache Engine) for SuperSlab allocator
// Purpose: Dynamic SuperSlab size adaptation based on usage patterns
// License: MIT
// Date: 2025-11-28
#include "hakmem_tiny_superslab_internal.h"
// ============================================================================
// ACE (Adaptive Cache Engine) State
// ============================================================================
SuperSlabACEState g_ss_ace[TINY_NUM_CLASSES_SS] = {{0}};
int g_ss_force_lg = -1;
_Atomic int g_ss_populate_once = 0;
// ============================================================================
// ACE Helper Functions
// ============================================================================
// Forward: decide next SuperSlab lg for a class (ACE-aware, clamped)
uint8_t hak_tiny_superslab_next_lg(int class_idx)
{
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
return SUPERSLAB_LG_DEFAULT;
}
// Prefer ACE target if within allowed range
uint8_t t = atomic_load_explicit((_Atomic uint8_t*)&g_ss_ace[class_idx].target_lg,
memory_order_relaxed);
if (t < SUPERSLAB_LG_MIN || t > SUPERSLAB_LG_MAX) {
return SUPERSLAB_LG_DEFAULT;
}
return t;
}
// ============================================================================
// ACE Statistics / Debugging
// ============================================================================
void superslab_ace_print_stats(void) {
printf("=== ACE (Adaptive Cache Engine) Stats ===\n");
const char* class_names[8] = {"8B", "16B", "24B", "32B", "40B", "48B", "56B", "64B"};
printf("Class Curr Targ Hot Allocs Refills Spills LiveBlks\n");
printf("--------------------------------------------------------------\n");
for (int i = 0; i < TINY_NUM_CLASSES_SS; i++) {
SuperSlabACEState* c = &g_ss_ace[i];
printf("%-6s %2uMB %2uMB %4u %7u %8u %7u %9u\n",
class_names[i],
(1u << c->current_lg) / (1024 * 1024),
(1u << c->target_lg) / (1024 * 1024),
c->hot_score,
c->alloc_count,
c->refill_count,
c->spill_count,
c->live_blocks);
}
printf("\n");
}
// ============================================================================
// ACE Tick Function (Promotion/Demotion Logic)
// ============================================================================
#define ACE_TICK_NS (150ULL * 1000 * 1000) // 150ms tick interval
#define ACE_COOLDOWN_NS (800ULL * 1000 * 1000) // 0.8s cooldown (anti-oscillation)
// Simplified thresholds for refill activity
#define HI_REFILL(k) (g_ss_ace[k].refill_count > 64) // High refill rate
#define MID_REFILL(k) (g_ss_ace[k].refill_count > 16) // Medium refill rate
// Object sizes per class (for capacity calculation)
// Must match TINY size classes: 8, 16, 24, 32, 40, 48, 56, 64 bytes
static const int g_tiny_obj_sizes[TINY_NUM_CLASSES_SS] = {8, 16, 24, 32, 40, 48, 56, 64};
void hak_tiny_superslab_ace_tick(int k, uint64_t now) {
if (k < 0 || k >= TINY_NUM_CLASSES_SS) return;
SuperSlabACEState* c = &g_ss_ace[k];
// Rate limiting: only tick every ACE_TICK_NS (~150ms)
if (now - c->last_tick_ns < ACE_TICK_NS) return;
// Calculate capacity for 1MB and 2MB SuperSlabs
int obj_size = g_tiny_obj_sizes[k];
double cap1MB = (double)((1U << 20) / obj_size); // 1MB capacity
double cap2MB = (double)((1U << 21) / obj_size); // 2MB capacity
// Calculate hotness score (weighted: 60% live blocks, 40% refill rate)
double hot = 0.6 * (double)c->live_blocks + 0.4 * (double)c->refill_count;
if (hot < 0) hot = 0;
if (hot > 1000) hot = 1000;
c->hot_score = (uint16_t)hot;
// Cooldown mechanism: prevent size changes within 0.8s of last change
static uint64_t last_switch_ns[TINY_NUM_CLASSES_SS] = {0};
if (now - last_switch_ns[k] >= ACE_COOLDOWN_NS) {
if (c->current_lg <= 20) {
// Promotion condition: 1MB → 2MB
// High demand (live > 75% capacity) AND high refill rate
if (c->live_blocks > 0.75 * cap1MB && HI_REFILL(k)) {
c->target_lg = 21; // Promote to 2MB
last_switch_ns[k] = now;
}
} else {
// Demotion condition: 2MB → 1MB
// Low demand (live < 35% capacity) AND low refill rate
if (c->live_blocks < 0.35 * cap2MB && !MID_REFILL(k)) {
c->target_lg = 20; // Demote to 1MB
last_switch_ns[k] = now;
}
}
}
// EMA-style decay for counters (reduce by 75% each tick)
c->alloc_count = c->alloc_count / 4;
c->refill_count = c->refill_count / 4;
c->spill_count = c->spill_count / 4;
// live_blocks is updated incrementally by alloc/free, not decayed here
c->last_tick_ns = now;
}
// ============================================================================
// ACE Observer (Registry-based, zero hot-path overhead)
// ============================================================================
// Global debug flag (set once at initialization)
static int g_ace_debug = 0;
// Registry-based observation: scan all SuperSlabs for usage stats
void ace_observe_and_decide(int k) {
if (k < 0 || k >= TINY_NUM_CLASSES_SS) return;
SuperSlabACEState* c = &g_ss_ace[k];
// Scan Registry to count SuperSlabs and total live blocks
int ss_count = 0;
uint32_t total_live = 0;
for (int i = 0; i < SUPER_REG_SIZE; i++) {
SuperRegEntry* e = &g_super_reg[i];
// Atomic read (thread-safe)
uintptr_t base = atomic_load_explicit(
(_Atomic uintptr_t*)&e->base,
memory_order_acquire);
if (base == 0) continue; // Empty slot
// Phase 8.4: Safety check - skip if ss pointer is invalid
if (!e->ss) continue;
// Phase 12: per-SS size_class removed; registry entries are per-class by construction.
ss_count++;
// Phase 8.4: Scan all slabs to count used blocks (zero hot-path overhead)
uint32_t ss_live = 0;
int cap_scan = ss_slabs_capacity(e->ss);
for (int slab_idx = 0; slab_idx < cap_scan; slab_idx++) {
TinySlabMeta* meta = &e->ss->slabs[slab_idx];
// Relaxed read is OK (stats only, no hot-path impact)
ss_live += meta->used;
}
total_live += ss_live;
}
// Calculate utilization
int obj_size = g_tiny_obj_sizes[k];
uint8_t current_lg = atomic_load_explicit(
(_Atomic uint8_t*)&c->current_lg,
memory_order_relaxed);
uint32_t capacity = (ss_count > 0) ? ss_count * ((1U << current_lg) / obj_size) : 1;
double util = (double)total_live / capacity;
// Update hot_score (for debugging/visualization)
c->hot_score = (uint16_t)(util * 1000);
if (c->hot_score > 1000) c->hot_score = 1000;
// Promotion/Demotion decision
uint8_t new_target = current_lg;
if (current_lg <= 20) {
// Promotion: 1MB → 2MB
if (util > 0.75) {
new_target = 21;
}
} else {
// Demotion: 2MB → 1MB
if (util < 0.35) {
new_target = 20;
}
}
// Debug output (if enabled)
if (g_ace_debug && ss_count > 0) {
fprintf(stderr, "[ACE] Class %d (%dB): ss=%d live=%u cap=%u util=%.2f%% lg=%d->%d hot=%d\n",
k, obj_size, ss_count, total_live, capacity, util * 100.0,
current_lg, new_target, c->hot_score);
}
// Atomic write (thread-safe)
if (new_target != current_lg) {
atomic_store_explicit(
(_Atomic uint8_t*)&c->target_lg,
new_target,
memory_order_release);
if (g_ace_debug) {
fprintf(stderr, "[ACE] *** Class %d: SIZE CHANGE %dMB -> %dMB (util=%.2f%%)\n",
k, 1 << (current_lg - 20), 1 << (new_target - 20), util * 100.0);
}
}
}
// Called from Learner thread (background observation)
void hak_tiny_superslab_ace_observe_all(void) {
// Initialize debug flag once
static int initialized = 0;
if (!initialized) {
const char* ace_debug = getenv("HAKMEM_ACE_DEBUG");
g_ace_debug = (ace_debug && atoi(ace_debug) != 0) ? 1 : 0;
initialized = 1;
}
for (int k = 0; k < TINY_NUM_CLASSES_SS; k++) {
ace_observe_and_decide(k);
}
}

313
core/superslab_allocate.c Normal file
View File

@ -0,0 +1,313 @@
// superslab_allocate.c - SuperSlab allocation and deallocation
// Purpose: Main allocation/free entry points for SuperSlabs
// License: MIT
// Date: 2025-11-28
#include "hakmem_tiny_superslab_internal.h"
// ============================================================================
// SuperSlab Allocation (2MB aligned)
// ============================================================================
SuperSlab* superslab_allocate(uint8_t size_class) {
// Optional fault injection for testing: HAKMEM_TINY_SS_FAULT_RATE=N → 1/N で失敗
static int fault_rate = -1; // -1=unparsed, 0=disabled, >0=rate
static __thread unsigned long fault_tick = 0;
if (__builtin_expect(fault_rate == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_SS_FAULT_RATE");
if (e && *e) {
int v = atoi(e); if (v < 0) v = 0; fault_rate = v;
} else {
fault_rate = 0;
}
}
if (fault_rate > 0) {
unsigned long t = ++fault_tick;
if ((t % (unsigned long)fault_rate) == 0ul) {
return NULL; // simulate OOM
}
}
// Optional env clamp for SuperSlab size
static int env_parsed = 0;
static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_DEFAULT; // Start with default (2MB)
static uint8_t g_ss_max_lg_env = SUPERSLAB_LG_MAX;
if (!env_parsed) {
char* maxmb = getenv("HAKMEM_TINY_SS_MAX_MB");
if (maxmb) {
int m = atoi(maxmb); if (m == 1) g_ss_max_lg_env = 20; else if (m == 2) g_ss_max_lg_env = 21;
}
char* minmb = getenv("HAKMEM_TINY_SS_MIN_MB");
if (minmb) {
int m = atoi(minmb); if (m == 1) g_ss_min_lg_env = 20; else if (m == 2) g_ss_min_lg_env = 21;
}
if (g_ss_min_lg_env > g_ss_max_lg_env) g_ss_min_lg_env = g_ss_max_lg_env;
const char* force_lg_env = getenv("HAKMEM_TINY_SS_FORCE_LG");
if (force_lg_env && *force_lg_env) {
int v = atoi(force_lg_env);
if (v >= SUPERSLAB_LG_MIN && v <= SUPERSLAB_LG_MAX) {
g_ss_force_lg = v;
g_ss_min_lg_env = g_ss_max_lg_env = v;
}
}
size_t precharge_default = 0;
const char* precharge_env = getenv("HAKMEM_TINY_SS_PRECHARGE");
if (precharge_env && *precharge_env) {
long v = atol(precharge_env);
if (v < 0) v = 0;
precharge_default = (size_t)v;
if (v > 0) {
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
}
}
size_t cache_default = 0;
const char* cache_env = getenv("HAKMEM_TINY_SS_CACHE");
if (cache_env && *cache_env) {
long v = atol(cache_env);
if (v < 0) v = 0;
cache_default = (size_t)v;
}
for (int i = 0; i < 8; i++) {
g_ss_cache_cap[i] = cache_default;
g_ss_precharge_target[i] = precharge_default;
}
for (int i = 0; i < 8; i++) {
char name[64];
snprintf(name, sizeof(name), "HAKMEM_TINY_SS_CACHE_C%d", i);
char* cap_env = getenv(name);
if (cap_env && *cap_env) {
long v = atol(cap_env);
if (v < 0) v = 0;
g_ss_cache_cap[i] = (size_t)v;
}
snprintf(name, sizeof(name), "HAKMEM_TINY_SS_PRECHARGE_C%d", i);
char* pre_env = getenv(name);
if (pre_env && *pre_env) {
long v = atol(pre_env);
if (v < 0) v = 0;
g_ss_precharge_target[i] = (size_t)v;
if (v > 0) {
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
}
}
if (g_ss_cache_cap[i] > 0 || g_ss_precharge_target[i] > 0) {
g_ss_cache_enabled = 1;
}
}
const char* populate_env = getenv("HAKMEM_TINY_SS_POPULATE_ONCE");
if (populate_env && atoi(populate_env) != 0) {
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
}
env_parsed = 1;
}
uint8_t lg = (g_ss_force_lg >= 0) ? (uint8_t)g_ss_force_lg : hak_tiny_superslab_next_lg(size_class);
if (lg < g_ss_min_lg_env) lg = g_ss_min_lg_env;
if (lg > g_ss_max_lg_env) lg = g_ss_max_lg_env;
size_t ss_size = (size_t)1 << lg; // 2^20 = 1MB, 2^21 = 2MB
uintptr_t ss_mask = ss_size - 1;
int from_cache = 0;
void* ptr = NULL;
// Debug logging flag (lazy init)
static __thread int dbg = -1;
#if HAKMEM_BUILD_RELEASE
dbg = 0;
#else
if (__builtin_expect(dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG");
dbg = (e && *e && *e != '0') ? 1 : 0;
}
#endif
// Phase 9: Try LRU cache first (lazy deallocation)
SuperSlab* cached_ss = hak_ss_lru_pop(size_class);
if (cached_ss) {
ptr = (void*)cached_ss;
from_cache = 1;
// Debug logging for REFILL from LRU
if (dbg == 1) {
fprintf(stderr, "[REFILL] class=%d from_lru=1 ss=%p\n",
size_class, (void*)cached_ss);
}
// Skip old cache path - LRU cache takes priority
} else if (g_ss_cache_enabled && size_class < 8) {
// Fallback to old cache (will be deprecated)
ss_cache_precharge(size_class, ss_size, ss_mask);
SuperslabCacheEntry* old_cached = ss_cache_pop(size_class);
if (old_cached) {
ptr = (void*)old_cached;
from_cache = 1;
// Debug logging for REFILL from prewarm (old cache is essentially prewarm)
if (dbg == 1) {
fprintf(stderr, "[REFILL] class=%d from_prewarm=1 ss=%p\n",
size_class, (void*)old_cached);
}
}
}
if (!ptr) {
int populate = atomic_exchange_explicit(&g_ss_populate_once, 0, memory_order_acq_rel);
ptr = ss_os_acquire(size_class, ss_size, ss_mask, populate);
if (!ptr) {
return NULL;
}
// Debug logging for REFILL with new allocation
if (dbg == 1) {
fprintf(stderr, "[REFILL] class=%d new_alloc=1 ss=%p\n",
size_class, (void*)ptr);
}
}
// Initialize SuperSlab header (Phase 12: no global size_class field)
SuperSlab* ss = (SuperSlab*)ptr;
ss->magic = SUPERSLAB_MAGIC;
ss->active_slabs = 0;
ss->lg_size = lg; // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB)
ss->slab_bitmap = 0;
ss->nonempty_mask = 0; // Phase 6-2.1: ChatGPT Pro P0 - init nonempty mask
ss->partial_epoch = 0;
ss->publish_hint = 0xFF;
// Initialize atomics explicitly
atomic_store_explicit(&ss->total_active_blocks, 0, memory_order_relaxed);
atomic_store_explicit(&ss->refcount, 0, memory_order_relaxed);
atomic_store_explicit(&ss->listed, 0, memory_order_relaxed);
ss->partial_next = NULL;
// Phase 9: Initialize LRU fields
ss->last_used_ns = 0;
ss->generation = 0;
ss->lru_prev = NULL;
ss->lru_next = NULL;
// Phase 3d-C: Initialize Hot/Cold Split fields
ss->hot_count = 0;
ss->cold_count = 0;
for (int i = 0; i < 16; i++) {
ss->hot_indices[i] = 0;
ss->cold_indices[i] = 0;
}
// Initialize all slab metadata (only up to max slabs for this size)
int max_slabs = (int)(ss_size / SLAB_SIZE);
// PERF_OPT: memset removed - mmap() already returns zero-initialized pages
// Previous memset calls consumed 23.83% CPU time (perf analysis 2025-11-28)
// Measured improvement: +1.3% throughput (71.86M → 72.78M ops/s)
// Note: ASan/debug builds may need these, but production mmap guarantees zero pages
// memset(ss->slabs, 0, max_slabs * sizeof(TinySlabMeta));
// memset(ss->remote_heads, 0, max_slabs * sizeof(uintptr_t));
// memset(ss->remote_counts, 0, max_slabs * sizeof(uint32_t));
// memset(ss->slab_listed, 0, max_slabs * sizeof(uint32_t));
for (int i = 0; i < max_slabs; i++) {
// Phase 1: Atomic initialization (freelist + used are now _Atomic)
slab_freelist_store_relaxed(&ss->slabs[i], NULL); // Explicit NULL (redundant after memset, but clear intent)
atomic_store_explicit(&ss->slabs[i].used, 0, memory_order_relaxed);
ss->slabs[i].capacity = 0;
ss->slabs[i].owner_tid_low = 0;
// Initialize remote queue atomics (memset already zeroed, but use proper atomic init)
atomic_store_explicit(&ss->remote_heads[i], 0, memory_order_relaxed);
atomic_store_explicit(&ss->remote_counts[i], 0, memory_order_relaxed);
atomic_store_explicit(&ss->slab_listed[i], 0, memory_order_relaxed);
}
if (from_cache) {
ss_stats_cache_reuse();
}
// Phase 8.3: Update ACE current_lg to match allocated size
g_ss_ace[size_class].current_lg = lg;
// Phase 1: Register SuperSlab in global registry for fast lookup
// CRITICAL: Register AFTER full initialization (ss structure is ready)
uintptr_t base = (uintptr_t)ss;
if (!hak_super_register(base, ss)) {
// Registry full - this is a fatal error
fprintf(stderr, "HAKMEM FATAL: SuperSlab registry full, cannot register %p\n", ss);
// Still return ss to avoid memory leak, but lookups may fail
}
return ss;
}
// ============================================================================
// SuperSlab Deallocation
// ============================================================================
void superslab_free(SuperSlab* ss) {
if (!ss || ss->magic != SUPERSLAB_MAGIC) {
return; // Invalid SuperSlab
}
// ADD DEBUG LOGGING
static __thread int dbg = -1;
#if HAKMEM_BUILD_RELEASE
dbg = 0;
#else
if (__builtin_expect(dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SS_FREE_DEBUG");
dbg = (e && *e && *e != '0') ? 1 : 0;
}
#endif
if (dbg == 1) {
fprintf(stderr, "[SS_FREE] CALLED: ss=%p lg_size=%d active_slabs=%u\n",
(void*)ss, ss->lg_size, ss->active_slabs);
}
// Phase 9: Lazy Deallocation - try to cache in LRU instead of munmap
size_t ss_size = (size_t)1 << ss->lg_size;
// Phase 1: Unregister SuperSlab from registry FIRST
// CRITICAL: Must unregister BEFORE adding to LRU cache
// Reason: Cached SuperSlabs should NOT be found by lookups
uintptr_t base = (uintptr_t)ss;
hak_super_unregister(base);
// Memory fence to ensure unregister is visible
atomic_thread_fence(memory_order_release);
// Phase 9: Try LRU cache first (lazy deallocation)
// NOTE: LRU cache keeps magic=SUPERSLAB_MAGIC for validation
// Magic will be cleared on eviction or reuse
int lru_cached = hak_ss_lru_push(ss);
if (dbg == 1) {
fprintf(stderr, "[SS_FREE] hak_ss_lru_push() returned %d\n", lru_cached);
}
if (lru_cached) {
// Successfully cached in LRU - defer munmap
return;
}
// LRU cache full or disabled - try old cache using head class_idx (if known)
int old_cached = ss_cache_push(0, ss);
if (old_cached) {
ss_stats_cache_store();
return;
}
// Both caches full - immediately free to OS (eager deallocation)
// Clear magic to prevent use-after-free
ss->magic = 0;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n",
(void*)ss, ss_size,
atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed));
#endif
munmap(ss, ss_size);
// Update statistics for actual release to OS
pthread_mutex_lock(&g_superslab_lock);
g_superslabs_freed++;
// Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here
g_bytes_allocated -= ss_size;
pthread_mutex_unlock(&g_superslab_lock);
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[DEBUG ss_os_release] g_superslabs_freed now = %llu\n",
(unsigned long long)g_superslabs_freed);
#endif
}

281
core/superslab_backend.c Normal file
View File

@ -0,0 +1,281 @@
// superslab_backend.c - Backend allocation paths for SuperSlab allocator
// Purpose: Legacy and shared pool backend implementations
// License: MIT
// Date: 2025-11-28
#include "hakmem_tiny_superslab_internal.h"
/*
* superslab_return_block() - Single exit point for all SuperSlab allocations
*
* Purpose: Ensures consistent header writing across all allocation paths.
* This prevents bugs where headers are written in some paths but not others.
*
* Parameters:
* base - Block start address from SuperSlab geometry
* class_idx - Tiny class index (0-7)
*
* Returns:
* User pointer (base + 1 if headers enabled, base otherwise)
*
* Header writing behavior:
* - If HAKMEM_TINY_HEADER_CLASSIDX=1: Writes header via tiny_region_id_write_header()
* - If HAKMEM_TINY_HEADER_CLASSIDX=0: Returns base directly (no header)
*/
static inline void* superslab_return_block(void* base, int class_idx) {
#if HAKMEM_TINY_HEADER_CLASSIDX
return tiny_region_id_write_header(base, class_idx);
#else
return (void*)base;
#endif
}
/*
* Legacy backend for hak_tiny_alloc_superslab_box().
*
* Phase 12 Stage A/B:
* - Uses per-class SuperSlabHead (g_superslab_heads) as the implementation.
* - Callers MUST use hak_tiny_alloc_superslab_box() and never touch this directly.
* - Later Stage C: this function will be replaced by a shared_pool backend.
*/
void* hak_tiny_alloc_superslab_backend_legacy(int class_idx)
{
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
return NULL;
}
SuperSlabHead* head = g_superslab_heads[class_idx];
if (!head) {
head = init_superslab_head(class_idx);
if (!head) {
return NULL;
}
g_superslab_heads[class_idx] = head;
}
SuperSlab* chunk = head->current_chunk ? head->current_chunk : head->first_chunk;
while (chunk) {
int cap = ss_slabs_capacity(chunk);
for (int slab_idx = 0; slab_idx < cap; slab_idx++) {
TinySlabMeta* meta = &chunk->slabs[slab_idx];
// Skip slabs that belong to a different class (or are uninitialized).
if (meta->class_idx != (uint8_t)class_idx && meta->class_idx != 255) {
continue;
}
// P1.2 FIX: Initialize slab on first use (like shared backend does)
// This ensures class_map is populated for all slabs, not just slab 0
if (meta->capacity == 0) {
size_t block_size = g_tiny_class_sizes[class_idx];
uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self();
superslab_init_slab(chunk, slab_idx, block_size, owner_tid);
meta = &chunk->slabs[slab_idx]; // Refresh pointer after init
meta->class_idx = (uint8_t)class_idx;
// P1.2: Update class_map for dynamic slab initialization
chunk->class_map[slab_idx] = (uint8_t)class_idx;
}
if (meta->used < meta->capacity) {
size_t stride = tiny_block_stride_for_class(class_idx);
size_t offset = (size_t)meta->used * stride;
uint8_t* base = (uint8_t*)chunk
+ SUPERSLAB_SLAB0_DATA_OFFSET
+ (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE
+ offset;
meta->used++;
atomic_fetch_add_explicit(&chunk->total_active_blocks, 1, memory_order_relaxed);
return superslab_return_block(base, class_idx);
}
}
chunk = chunk->next_chunk;
}
if (expand_superslab_head(head) < 0) {
return NULL;
}
SuperSlab* new_chunk = head->current_chunk;
if (!new_chunk) {
return NULL;
}
int cap2 = ss_slabs_capacity(new_chunk);
for (int slab_idx = 0; slab_idx < cap2; slab_idx++) {
TinySlabMeta* meta = &new_chunk->slabs[slab_idx];
// P1.2 FIX: Initialize slab on first use (like shared backend does)
if (meta->capacity == 0) {
size_t block_size = g_tiny_class_sizes[class_idx];
uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self();
superslab_init_slab(new_chunk, slab_idx, block_size, owner_tid);
meta = &new_chunk->slabs[slab_idx]; // Refresh pointer after init
meta->class_idx = (uint8_t)class_idx;
// P1.2: Update class_map for dynamic slab initialization
new_chunk->class_map[slab_idx] = (uint8_t)class_idx;
}
if (meta->used < meta->capacity) {
size_t stride = tiny_block_stride_for_class(class_idx);
size_t offset = (size_t)meta->used * stride;
uint8_t* base = (uint8_t*)new_chunk
+ SUPERSLAB_SLAB0_DATA_OFFSET
+ (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE
+ offset;
meta->used++;
atomic_fetch_add_explicit(&new_chunk->total_active_blocks, 1, memory_order_relaxed);
return superslab_return_block(base, class_idx);
}
}
return NULL;
}
/*
* Shared pool backend for hak_tiny_alloc_superslab_box().
*
* Phase 12-2:
* - Uses SharedSuperSlabPool (g_shared_pool) to obtain a SuperSlab/slab
* for the requested class_idx.
* - This backend EXPRESSLY owns only:
* - choosing (ss, slab_idx) via shared_pool_acquire_slab()
* - initializing that slab's TinySlabMeta via superslab_init_slab()
* and nothing else; all callers must go through hak_tiny_alloc_superslab_box().
*
* - For now this is a minimal, conservative implementation:
* - One linear bump-run is carved from the acquired slab using tiny_block_stride_for_class().
* - No complex per-slab freelist or refill policy yet (Phase 12-3+).
* - If shared_pool_acquire_slab() fails, we fall back to legacy backend.
*/
void* hak_tiny_alloc_superslab_backend_shared(int class_idx)
{
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
return NULL;
}
SuperSlab* ss = NULL;
int slab_idx = -1;
if (shared_pool_acquire_slab(class_idx, &ss, &slab_idx) != 0 || !ss) {
// Shared pool could not provide a slab; caller may choose to fall back.
return NULL;
}
TinySlabMeta* meta = &ss->slabs[slab_idx];
// Defensive: shared_pool must either hand us an UNASSIGNED slab or one
// already bound to this class. Anything else is a hard bug.
if (meta->class_idx != 255 && meta->class_idx != (uint8_t)class_idx) {
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr,
"[HAKMEM][SS_SHARED] BUG: acquire_slab mismatch: cls=%d meta->class_idx=%u slab_idx=%d ss=%p\n",
class_idx, (unsigned)meta->class_idx, slab_idx, (void*)ss);
#endif
return NULL;
}
// Initialize slab geometry once for this class.
if (meta->capacity == 0) {
size_t block_size = g_tiny_class_sizes[class_idx];
// LARSON FIX: Pass actual thread ID for cross-thread free detection
uint32_t my_tid = (uint32_t)(uintptr_t)pthread_self();
superslab_init_slab(ss, slab_idx, block_size, my_tid);
meta = &ss->slabs[slab_idx];
// CRITICAL FIX: Always set class_idx after init to avoid C0/C7 confusion.
// New SuperSlabs start with meta->class_idx=0 (mmap zero-init).
// Must explicitly set to requested class, not just when class_idx==255.
meta->class_idx = (uint8_t)class_idx;
// P1.1: Update class_map in shared acquire path
ss->class_map[slab_idx] = (uint8_t)class_idx;
}
// Final contract check before computing addresses.
if (meta->class_idx != (uint8_t)class_idx ||
meta->capacity == 0 ||
meta->used > meta->capacity) {
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr,
"[HAKMEM][SS_SHARED] BUG: invalid slab meta before alloc: "
"cls=%d slab_idx=%d meta_cls=%u used=%u cap=%u ss=%p\n",
class_idx, slab_idx,
(unsigned)meta->class_idx,
(unsigned)meta->used,
(unsigned)meta->capacity,
(void*)ss);
#endif
return NULL;
}
// Simple bump allocation within this slab.
if (meta->used >= meta->capacity) {
// Slab exhausted: in minimal Phase12-2 backend we do not loop;
// caller or future logic must acquire another slab.
return NULL;
}
size_t stride = tiny_block_stride_for_class(class_idx);
size_t offset = (size_t)meta->used * stride;
// Phase 12-2 minimal geometry:
// - slab 0 data offset via SUPERSLAB_SLAB0_DATA_OFFSET
// - subsequent slabs at fixed SUPERSLAB_SLAB_USABLE_SIZE strides.
size_t slab_base_off = SUPERSLAB_SLAB0_DATA_OFFSET
+ (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE;
uint8_t* base = (uint8_t*)ss + slab_base_off + offset;
meta->used++;
atomic_fetch_add_explicit(&ss->total_active_blocks, 1, memory_order_relaxed);
return superslab_return_block(base, class_idx);
}
/*
* Box API entry:
* - Single front-door for tiny-side Superslab allocations.
*
* Phase 12 policy:
* - HAKMEM_TINY_SS_SHARED=0 → legacy backendのみ回帰確認用
* - HAKMEM_TINY_SS_SHARED=1 → shared backendを優先し、失敗時のみ legacy にフォールバック
*/
void* hak_tiny_alloc_superslab_box(int class_idx)
{
static int g_ss_shared_mode = -1;
static _Atomic uint32_t g_ss_backend_log = 0;
if (__builtin_expect(g_ss_shared_mode == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_SS_SHARED");
if (!e || !*e) {
g_ss_shared_mode = 1; // デフォルト: shared 有効
} else {
int v = atoi(e);
g_ss_shared_mode = (v != 0) ? 1 : 0;
}
}
if (g_ss_shared_mode == 1) {
void* p = hak_tiny_alloc_superslab_backend_shared(class_idx);
if (p != NULL) {
uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
if (n < 4) {
fprintf(stderr, "[SS_BACKEND] shared cls=%d ptr=%p\n", class_idx, p);
}
return p;
}
// shared backend が失敗した場合は安全側で legacy にフォールバック
uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
if (n < 4) {
fprintf(stderr, "[SS_BACKEND] shared_fail→legacy cls=%d\n", class_idx);
}
return hak_tiny_alloc_superslab_backend_legacy(class_idx);
}
// shared OFF 時は legacy のみ
uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
if (n < 4) {
fprintf(stderr, "[SS_BACKEND] legacy cls=%d\n", class_idx);
}
return hak_tiny_alloc_superslab_backend_legacy(class_idx);
}

204
core/superslab_cache.c Normal file
View File

@ -0,0 +1,204 @@
// superslab_cache.c - Cache management for SuperSlab allocator
// Purpose: LRU cache and old cache (prewarm) for SuperSlabs
// License: MIT
// Date: 2025-11-28
#include "hakmem_tiny_superslab_internal.h"
// ============================================================================
// Cache System - Global Variables
// ============================================================================
SuperslabCacheEntry* g_ss_cache_head[8] = {0};
size_t g_ss_cache_count[8] = {0};
size_t g_ss_cache_cap[8] = {0};
size_t g_ss_precharge_target[8] = {0};
_Atomic int g_ss_precharge_done[8] = {0};
int g_ss_cache_enabled = 0;
pthread_once_t g_ss_cache_once = PTHREAD_ONCE_INIT;
pthread_mutex_t g_ss_cache_lock[8];
uint64_t g_ss_cache_hits[8] = {0};
uint64_t g_ss_cache_misses[8] = {0};
uint64_t g_ss_cache_puts[8] = {0};
uint64_t g_ss_cache_drops[8] = {0};
uint64_t g_ss_cache_precharged[8] = {0};
uint64_t g_superslabs_reused = 0;
uint64_t g_superslabs_cached = 0;
// ============================================================================
// Cache Initialization
// ============================================================================
void ss_cache_global_init(void) {
for (int i = 0; i < 8; i++) {
pthread_mutex_init(&g_ss_cache_lock[i], NULL);
}
}
void ss_cache_ensure_init(void) {
pthread_once(&g_ss_cache_once, ss_cache_global_init);
}
// ============================================================================
// OS Acquisition (mmap with alignment)
// ============================================================================
void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int populate) {
void* ptr = NULL;
static int log_count = 0;
#ifdef MAP_ALIGNED_SUPER
int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER;
#ifdef MAP_POPULATE
if (populate) {
map_flags |= MAP_POPULATE;
}
#endif
ptr = mmap(NULL, ss_size,
PROT_READ | PROT_WRITE,
map_flags,
-1, 0);
if (ptr != MAP_FAILED) {
atomic_fetch_add(&g_ss_mmap_count, 1);
if (((uintptr_t)ptr & ss_mask) == 0) {
ss_stats_os_alloc(size_class, ss_size);
return ptr;
}
munmap(ptr, ss_size);
ptr = NULL;
} else {
log_superslab_oom_once(ss_size, ss_size, errno);
}
#endif
size_t alloc_size = ss_size * 2;
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
#ifdef MAP_POPULATE
if (populate) {
flags |= MAP_POPULATE;
}
#endif
void* raw = mmap(NULL, alloc_size,
PROT_READ | PROT_WRITE,
flags,
-1, 0);
if (raw != MAP_FAILED) {
uint64_t count = atomic_fetch_add(&g_ss_mmap_count, 1) + 1;
#if !HAKMEM_BUILD_RELEASE
if (log_count < 10) {
fprintf(stderr, "[SUPERSLAB_MMAP] #%lu: class=%d size=%zu (total SuperSlab mmaps so far)\n",
(unsigned long)count, size_class, ss_size);
log_count++;
}
#endif
}
if (raw == MAP_FAILED) {
log_superslab_oom_once(ss_size, alloc_size, errno);
return NULL;
}
uintptr_t raw_addr = (uintptr_t)raw;
uintptr_t aligned_addr = (raw_addr + ss_mask) & ~ss_mask;
ptr = (void*)aligned_addr;
size_t prefix_size = aligned_addr - raw_addr;
if (prefix_size > 0) {
munmap(raw, prefix_size);
}
size_t suffix_size = alloc_size - prefix_size - ss_size;
if (suffix_size > 0) {
if (populate) {
#ifdef MADV_DONTNEED
madvise((char*)ptr + ss_size, suffix_size, MADV_DONTNEED);
#endif
} else {
munmap((char*)ptr + ss_size, suffix_size);
}
}
ss_stats_os_alloc(size_class, ss_size);
return ptr;
}
// ============================================================================
// Cache Precharge (prewarm)
// ============================================================================
void ss_cache_precharge(uint8_t size_class, size_t ss_size, uintptr_t ss_mask) {
if (!g_ss_cache_enabled) return;
if (size_class >= 8) return;
if (g_ss_precharge_target[size_class] == 0) return;
if (atomic_load_explicit(&g_ss_precharge_done[size_class], memory_order_acquire)) return;
ss_cache_ensure_init();
pthread_mutex_lock(&g_ss_cache_lock[size_class]);
size_t target = g_ss_precharge_target[size_class];
size_t cap = g_ss_cache_cap[size_class];
size_t desired = target;
if (cap != 0 && desired > cap) {
desired = cap;
}
while (g_ss_cache_count[size_class] < desired) {
void* raw = ss_os_acquire(size_class, ss_size, ss_mask, 1);
if (!raw) {
break;
}
SuperslabCacheEntry* entry = (SuperslabCacheEntry*)raw;
entry->next = g_ss_cache_head[size_class];
g_ss_cache_head[size_class] = entry;
g_ss_cache_count[size_class]++;
g_ss_cache_precharged[size_class]++;
}
atomic_store_explicit(&g_ss_precharge_done[size_class], 1, memory_order_release);
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
}
// ============================================================================
// Cache Pop/Push Operations
// ============================================================================
SuperslabCacheEntry* ss_cache_pop(uint8_t size_class) {
if (!g_ss_cache_enabled) return NULL;
if (size_class >= 8) return NULL;
ss_cache_ensure_init();
pthread_mutex_lock(&g_ss_cache_lock[size_class]);
SuperslabCacheEntry* entry = g_ss_cache_head[size_class];
if (entry) {
g_ss_cache_head[size_class] = entry->next;
if (g_ss_cache_count[size_class] > 0) {
g_ss_cache_count[size_class]--;
}
entry->next = NULL;
g_ss_cache_hits[size_class]++;
} else {
g_ss_cache_misses[size_class]++;
}
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
return entry;
}
int ss_cache_push(uint8_t size_class, SuperSlab* ss) {
if (!g_ss_cache_enabled) return 0;
if (size_class >= 8) return 0;
ss_cache_ensure_init();
pthread_mutex_lock(&g_ss_cache_lock[size_class]);
size_t cap = g_ss_cache_cap[size_class];
if (cap != 0 && g_ss_cache_count[size_class] >= cap) {
g_ss_cache_drops[size_class]++;
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
return 0;
}
SuperslabCacheEntry* entry = (SuperslabCacheEntry*)ss;
entry->next = g_ss_cache_head[size_class];
g_ss_cache_head[size_class] = entry;
g_ss_cache_count[size_class]++;
g_ss_cache_puts[size_class]++;
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
return 1;
}

176
core/superslab_head.c Normal file
View File

@ -0,0 +1,176 @@
// superslab_head.c - SuperSlabHead management for dynamic expansion
// Purpose: Per-class chunk lists and expansion logic
// License: MIT
// Date: 2025-11-28
#include "hakmem_tiny_superslab_internal.h"
// ============================================================================
// Phase 2a: Dynamic Expansion - Global per-class SuperSlabHeads
// ============================================================================
SuperSlabHead* g_superslab_heads[TINY_NUM_CLASSES_SS] = {NULL};
// ============================================================================
// SuperSlabHead Management Functions
// ============================================================================
// Initialize SuperSlabHead for a class
SuperSlabHead* init_superslab_head(int class_idx) {
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
return NULL;
}
// Allocate SuperSlabHead structure
SuperSlabHead* head = (SuperSlabHead*)calloc(1, sizeof(SuperSlabHead));
if (!head) {
extern __thread int g_hakmem_lock_depth;
g_hakmem_lock_depth++;
fprintf(stderr, "[HAKMEM] CRITICAL: Failed to allocate SuperSlabHead for class %d\n", class_idx);
g_hakmem_lock_depth--;
return NULL;
}
head->class_idx = (uint8_t)class_idx;
atomic_store_explicit(&head->total_chunks, 0, memory_order_relaxed);
head->first_chunk = NULL;
head->current_chunk = NULL;
pthread_mutex_init(&head->expansion_lock, NULL);
// Allocate initial chunk(s)
// Hot classes (1, 4, 6) get 2 initial chunks to reduce contention
int initial_chunks = 1;
// Phase 2a: Start with 1 chunk for all classes (expansion will handle growth)
// This reduces startup memory overhead while still allowing unlimited growth
initial_chunks = 1;
for (int i = 0; i < initial_chunks; i++) {
if (expand_superslab_head(head) < 0) {
extern __thread int g_hakmem_lock_depth;
g_hakmem_lock_depth++;
fprintf(stderr, "[HAKMEM] CRITICAL: Failed to allocate initial chunk %d for class %d\n",
i, class_idx);
g_hakmem_lock_depth--;
// Cleanup on failure
SuperSlab* chunk = head->first_chunk;
while (chunk) {
SuperSlab* next = chunk->next_chunk;
superslab_free(chunk);
chunk = next;
}
pthread_mutex_destroy(&head->expansion_lock);
free(head);
return NULL;
}
}
extern __thread int g_hakmem_lock_depth;
g_hakmem_lock_depth++;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[HAKMEM] Initialized SuperSlabHead for class %d: %zu initial chunks\n",
class_idx, atomic_load_explicit(&head->total_chunks, memory_order_relaxed));
#endif
g_hakmem_lock_depth--;
return head;
}
// Expand SuperSlabHead by allocating and linking a new chunk
int expand_superslab_head(SuperSlabHead* head) {
if (!head) {
return -1;
}
// Allocate new chunk via existing superslab_allocate
SuperSlab* new_chunk = superslab_allocate(head->class_idx);
if (!new_chunk) {
#if !defined(NDEBUG) || defined(HAKMEM_SUPERSLAB_VERBOSE)
extern __thread int g_hakmem_lock_depth;
g_hakmem_lock_depth++;
fprintf(stderr, "[HAKMEM] CRITICAL: Failed to allocate new chunk for class %d (system OOM)\n",
head->class_idx);
g_hakmem_lock_depth--;
#endif
return -1; // True OOM (system out of memory)
}
// CRITICAL FIX: Initialize slab 0 so bitmap != 0x00000000
// Phase 2a chunks must have at least one usable slab after allocation
size_t block_size = g_tiny_class_sizes[head->class_idx];
// Use pthread_self() directly since tiny_self_u32() is static inline in hakmem_tiny.c
uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self();
superslab_init_slab(new_chunk, 0, block_size, owner_tid);
// Initialize the next_chunk link to NULL
new_chunk->next_chunk = NULL;
// Thread-safe linking
pthread_mutex_lock(&head->expansion_lock);
if (head->current_chunk) {
// Find the tail of the list (optimization: could cache tail pointer)
SuperSlab* tail = head->current_chunk;
while (tail->next_chunk) {
tail = tail->next_chunk;
}
tail->next_chunk = new_chunk;
} else {
// First chunk
head->first_chunk = new_chunk;
}
// Update current chunk to new chunk (for fast allocation)
head->current_chunk = new_chunk;
// Increment total chunks atomically
size_t old_count = atomic_fetch_add_explicit(&head->total_chunks, 1, memory_order_relaxed);
size_t new_count = old_count + 1;
pthread_mutex_unlock(&head->expansion_lock);
#if !defined(NDEBUG) || defined(HAKMEM_SUPERSLAB_VERBOSE)
extern __thread int g_hakmem_lock_depth;
g_hakmem_lock_depth++;
fprintf(stderr, "[HAKMEM] Expanded SuperSlabHead for class %d: %zu chunks now (bitmap=0x%08x)\n",
head->class_idx, new_count, new_chunk->slab_bitmap);
g_hakmem_lock_depth--;
#endif
return 0;
}
// Find which chunk a pointer belongs to
SuperSlab* find_chunk_for_ptr(void* ptr, int class_idx) {
if (!ptr || class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
return NULL;
}
SuperSlabHead* head = g_superslab_heads[class_idx];
if (!head) {
return NULL;
}
uintptr_t ptr_addr = (uintptr_t)ptr;
// Walk the chunk list
SuperSlab* chunk = head->first_chunk;
while (chunk) {
// Check if ptr is within this chunk's memory range
// Each chunk is aligned to SUPERSLAB_SIZE (1MB or 2MB)
uintptr_t chunk_start = (uintptr_t)chunk;
size_t chunk_size = (size_t)1 << chunk->lg_size; // Use actual chunk size
uintptr_t chunk_end = chunk_start + chunk_size;
if (ptr_addr >= chunk_start && ptr_addr < chunk_end) {
// Found the chunk
return chunk;
}
chunk = chunk->next_chunk;
}
return NULL; // Not found in any chunk
}

210
core/superslab_slab.c Normal file
View File

@ -0,0 +1,210 @@
// superslab_slab.c - Slab initialization and management
// Purpose: Slab lifecycle and bitmap management within SuperSlabs
// License: MIT
// Date: 2025-11-28
#include "hakmem_tiny_superslab_internal.h"
// ============================================================================
// Remote Drain (MPSC queue to freelist conversion)
// ============================================================================
// Drain remote MPSC stack into freelist (ownership already verified by caller)
void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta)
{
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss) || !meta) return;
static _Atomic uint32_t g_remote_drain_diag_once = 0;
static int g_remote_drain_diag_en = -1;
// Atomically take the whole remote list
uintptr_t head = atomic_exchange_explicit(&ss->remote_heads[slab_idx], 0,
memory_order_acq_rel);
if (head == 0) return;
// Convert remote stack (offset 0 next) into freelist encoding via Box API
// and splice in front of current freelist preserving relative order.
void* prev = meta->freelist;
int cls = (int)meta->class_idx;
HAK_CHECK_CLASS_IDX(cls, "_ss_remote_drain_to_freelist_unsafe");
if (__builtin_expect(cls < 0 || cls >= TINY_NUM_CLASSES, 0)) {
static _Atomic int g_remote_drain_cls_oob = 0;
if (atomic_fetch_add_explicit(&g_remote_drain_cls_oob, 1, memory_order_relaxed) == 0) {
fprintf(stderr,
"[REMOTE_DRAIN_CLASS_OOB] ss=%p slab_idx=%d meta=%p cls=%d head=%#lx\n",
(void*)ss, slab_idx, (void*)meta, cls, (unsigned long)head);
}
return;
}
uintptr_t cur = head;
while (cur != 0) {
uintptr_t next = *(uintptr_t*)cur; // remote-next stored at offset 0
#if !HAKMEM_BUILD_RELEASE
if (__builtin_expect(g_remote_drain_diag_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_SLL_DIAG");
g_remote_drain_diag_en = (e && *e && *e != '0') ? 1 : 0;
}
#else
if (__builtin_expect(g_remote_drain_diag_en == -1, 0)) {
g_remote_drain_diag_en = 0;
}
#endif
if (__builtin_expect(g_remote_drain_diag_en, 0)) {
uintptr_t addr = (uintptr_t)next;
if (addr != 0 && (addr < 4096 || addr > 0x00007fffffffffffULL)) {
uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed);
if (shot < 8) {
fprintf(stderr,
"[REMOTE_DRAIN_NEXT_INVALID] cls=%d slab=%d cur=%p next=%p head=%#lx prev=%p count=%u\n",
cls,
slab_idx,
(void*)cur,
(void*)next,
(unsigned long)head,
prev,
(unsigned)meta->used);
}
}
#if HAKMEM_TINY_HEADER_CLASSIDX
int hdr_cls = tiny_region_id_read_header((uint8_t*)cur + 1);
if (hdr_cls >= 0 && hdr_cls != cls) {
uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed);
if (shot < 8) {
fprintf(stderr,
"[REMOTE_DRAIN_HDR_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d head=%#lx\n",
cls, slab_idx, (void*)cur, hdr_cls, (int)meta->class_idx, (unsigned long)head);
}
}
#endif
}
#if HAKMEM_TINY_HEADER_CLASSIDX
// Cross-check header vs meta before writing next (even if diag is off)
{
int hdr_cls_pre = tiny_region_id_read_header((uint8_t*)cur + 1);
if (hdr_cls_pre >= 0 && hdr_cls_pre != cls) {
static _Atomic uint32_t g_hdr_meta_mismatch_rd = 0;
uint32_t n = atomic_fetch_add_explicit(&g_hdr_meta_mismatch_rd, 1, memory_order_relaxed);
if (n < 16) {
fprintf(stderr,
"[REMOTE_DRAIN_HDR_META_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d\n",
cls, slab_idx, (void*)cur, hdr_cls_pre, (int)meta->class_idx);
}
}
}
#endif
// Restore header for header-classes (class 1-6) which were clobbered by remote push
#if HAKMEM_TINY_HEADER_CLASSIDX
if (cls != 0) {
uint8_t expected = (uint8_t)(HEADER_MAGIC | (cls & HEADER_CLASS_MASK));
*(uint8_t*)(uintptr_t)cur = expected;
}
#endif
// Rewrite next pointer to Box representation for this class
tiny_next_write(cls, (void*)cur, prev);
prev = (void*)cur;
cur = next;
}
meta->freelist = prev;
// Reset remote count after full drain
atomic_store_explicit(&ss->remote_counts[slab_idx], 0, memory_order_release);
// Update freelist/nonempty visibility bits
uint32_t bit = (1u << slab_idx);
atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release);
atomic_fetch_or_explicit(&ss->nonempty_mask, bit, memory_order_release);
}
// ============================================================================
// Slab Initialization within SuperSlab
// ============================================================================
void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_t owner_tid)
{
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
return;
}
// Phase E1-CORRECT unified geometry:
// - block_size is the TOTAL stride for this class (g_tiny_class_sizes[cls])
// - usable bytes are determined by slab index (slab0 vs others)
// - capacity = usable / stride for ALL classes (including former C7)
size_t usable_size = (slab_idx == 0)
? SUPERSLAB_SLAB0_USABLE_SIZE
: SUPERSLAB_SLAB_USABLE_SIZE;
size_t stride = block_size;
uint16_t capacity = (uint16_t)(usable_size / stride);
#if !HAKMEM_BUILD_RELEASE
if (slab_idx == 0) {
fprintf(stderr,
"[SUPERSLAB_INIT] slab 0: usable_size=%zu stride=%zu capacity=%u\n",
usable_size, stride, (unsigned)capacity);
}
#endif
TinySlabMeta* meta = &ss->slabs[slab_idx];
meta->freelist = NULL; // NULL = linear allocation mode
meta->used = 0;
meta->active = 0; // P1.3: blocks in use by user (starts at 0)
meta->tls_cached = 0; // P2.2: blocks cached in TLS SLL (starts at 0)
meta->capacity = capacity;
meta->carved = 0;
// LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes
meta->owner_tid_low = (uint8_t)((owner_tid >> 8) & 0xFFu);
// Fail-safe: stamp class_idx from geometry (stride → class).
// This ensures legacy/shared/legacy-refill paths all end with a correct class.
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
if (g_tiny_class_sizes[i] == stride) {
meta->class_idx = (uint8_t)i;
// P1.1: Update class_map for out-of-band lookup on free path
ss->class_map[slab_idx] = (uint8_t)i;
break;
}
}
superslab_activate_slab(ss, slab_idx);
}
// ============================================================================
// Slab Bitmap Management
// ============================================================================
void superslab_activate_slab(SuperSlab* ss, int slab_idx) {
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
return;
}
uint32_t mask = 1u << slab_idx;
if ((ss->slab_bitmap & mask) == 0) {
ss->slab_bitmap |= mask;
ss->active_slabs++;
// Phase 3d-C: Update hot/cold indices after activating new slab
ss_update_hot_cold_indices(ss);
}
}
void superslab_deactivate_slab(SuperSlab* ss, int slab_idx) {
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
return;
}
uint32_t mask = 1u << slab_idx;
if (ss->slab_bitmap & mask) {
ss->slab_bitmap &= ~mask;
ss->active_slabs--;
}
}
int superslab_find_free_slab(SuperSlab* ss) {
if (!ss) return -1;
if ((int)ss->active_slabs >= ss_slabs_capacity(ss)) {
return -1; // No free slabs
}
// Find first 0 bit in bitmap
int cap = ss_slabs_capacity(ss);
for (int i = 0; i < cap; i++) {
if ((ss->slab_bitmap & (1u << i)) == 0) {
return i;
}
}
return -1;
}

166
core/superslab_stats.c Normal file
View File

@ -0,0 +1,166 @@
// superslab_stats.c - Statistics and debugging for SuperSlab allocator
// Purpose: Tracking and reporting allocation statistics
// License: MIT
// Date: 2025-11-28
#include "hakmem_tiny_superslab_internal.h"
// ============================================================================
// Global Statistics
// ============================================================================
pthread_mutex_t g_superslab_lock = PTHREAD_MUTEX_INITIALIZER;
uint64_t g_superslabs_allocated = 0; // Non-static for debugging
uint64_t g_superslabs_freed = 0; // Phase 7.6: Non-static for test access
uint64_t g_bytes_allocated = 0; // Non-static for debugging
// Debug counters
_Atomic uint64_t g_ss_active_dec_calls = 0;
_Atomic uint64_t g_hak_tiny_free_calls = 0;
_Atomic uint64_t g_ss_remote_push_calls = 0;
// Free path instrumentation (lightweight, for OOM/route diagnosis)
_Atomic uint64_t g_free_ss_enter = 0; // hak_tiny_free_superslab() entries
_Atomic uint64_t g_free_local_box_calls = 0; // same-thread freelist pushes
_Atomic uint64_t g_free_remote_box_calls = 0; // cross-thread remote pushes
// Per-class counters for gating/metrics (Tiny classes = 8)
uint64_t g_ss_alloc_by_class[8] = {0};
uint64_t g_ss_freed_by_class[8] = {0};
// Global counters for debugging (non-static for external access)
_Atomic uint64_t g_ss_mmap_count = 0;
_Atomic uint64_t g_final_fallback_mmap_count = 0;
// ============================================================================
// Statistics Functions
// ============================================================================
void ss_stats_os_alloc(uint8_t size_class, size_t ss_size) {
pthread_mutex_lock(&g_superslab_lock);
g_superslabs_allocated++;
if (size_class < 8) {
g_ss_alloc_by_class[size_class]++;
}
g_bytes_allocated += ss_size;
pthread_mutex_unlock(&g_superslab_lock);
}
void ss_stats_cache_reuse(void) {
pthread_mutex_lock(&g_superslab_lock);
g_superslabs_reused++;
pthread_mutex_unlock(&g_superslab_lock);
}
void ss_stats_cache_store(void) {
pthread_mutex_lock(&g_superslab_lock);
g_superslabs_cached++;
pthread_mutex_unlock(&g_superslab_lock);
}
// ============================================================================
// Diagnostics
// ============================================================================
void log_superslab_oom_once(size_t ss_size, size_t alloc_size, int err) {
static int logged = 0;
if (logged) return;
logged = 1;
// CRITICAL FIX: Increment lock depth FIRST before any LIBC calls
// fopen/fclose/getrlimit/fprintf all may call malloc internally
// Must bypass HAKMEM wrapper to avoid header mismatch crash
extern __thread int g_hakmem_lock_depth;
g_hakmem_lock_depth++; // Force wrapper to use __libc_malloc
struct rlimit rl = {0};
if (getrlimit(RLIMIT_AS, &rl) != 0) {
rl.rlim_cur = RLIM_INFINITY;
rl.rlim_max = RLIM_INFINITY;
}
unsigned long vm_size_kb = 0;
unsigned long vm_rss_kb = 0;
FILE* status = fopen("/proc/self/status", "r");
if (status) {
char line[256];
while (fgets(line, sizeof(line), status)) {
if (strncmp(line, "VmSize:", 7) == 0) {
(void)sscanf(line + 7, "%lu", &vm_size_kb);
} else if (strncmp(line, "VmRSS:", 6) == 0) {
(void)sscanf(line + 6, "%lu", &vm_rss_kb);
}
}
fclose(status);
}
// CRITICAL FIX: Do NOT decrement lock_depth yet!
// fprintf() below may call malloc for buffering
char rl_cur_buf[32];
char rl_max_buf[32];
if (rl.rlim_cur == RLIM_INFINITY) {
strcpy(rl_cur_buf, "inf");
} else {
snprintf(rl_cur_buf, sizeof(rl_cur_buf), "%llu", (unsigned long long)rl.rlim_cur);
}
if (rl.rlim_max == RLIM_INFINITY) {
strcpy(rl_max_buf, "inf");
} else {
snprintf(rl_max_buf, sizeof(rl_max_buf), "%llu", (unsigned long long)rl.rlim_max);
}
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr,
"[SS OOM] mmap failed: err=%d ss_size=%zu alloc_size=%zu "
"alloc=%llu freed=%llu bytes=%llu "
"RLIMIT_AS(cur=%s max=%s) VmSize=%lu kB VmRSS=%lu kB\n",
err,
ss_size,
alloc_size,
(unsigned long long)g_superslabs_allocated,
(unsigned long long)g_superslabs_freed,
(unsigned long long)g_bytes_allocated,
rl_cur_buf,
rl_max_buf,
vm_size_kb,
vm_rss_kb);
#endif
g_hakmem_lock_depth--; // Now safe to restore (all libc calls complete)
}
// ============================================================================
// Statistics / Debugging
// ============================================================================
void superslab_print_stats(SuperSlab* ss) {
if (!ss || ss->magic != SUPERSLAB_MAGIC) {
printf("Invalid SuperSlab\n");
return;
}
printf("=== SuperSlab Stats ===\n");
printf("Address: %p\n", (void*)ss);
// Phase 12: per-SS size_class removed; classes are per-slab via meta->class_idx.
printf("Active slabs: %u / %d\n", ss->active_slabs, ss_slabs_capacity(ss));
printf("Bitmap: 0x%08X\n", ss->slab_bitmap);
printf("\nPer-slab details:\n");
for (int i = 0; i < ss_slabs_capacity(ss); i++) {
if (ss->slab_bitmap & (1u << i)) {
TinySlabMeta* meta = &ss->slabs[i];
printf(" Slab %2d: used=%u/%u freelist=%p class=%u owner_tid_low=%u\n",
i, meta->used, meta->capacity, meta->freelist,
(unsigned)meta->class_idx, (unsigned)meta->owner_tid_low);
}
}
printf("\n");
}
// Global statistics
void superslab_print_global_stats(void) {
pthread_mutex_lock(&g_superslab_lock);
printf("=== Global SuperSlab Stats ===\n");
printf("SuperSlabs allocated: %lu\n", g_superslabs_allocated);
printf("SuperSlabs freed: %lu\n", g_superslabs_freed);
printf("SuperSlabs active: %lu\n", g_superslabs_allocated - g_superslabs_freed);
printf("Total bytes allocated: %lu MB\n", g_bytes_allocated / (1024 * 1024));
pthread_mutex_unlock(&g_superslab_lock);
}

View File

@ -290,25 +290,18 @@ static inline int tiny_region_id_read_header(void* ptr) {
// CRITICAL FIX (Pool TLS Phase 1): ALWAYS validate magic when Pool TLS is enabled
// Reason: Pool TLS uses different magic (0xb0 vs 0xa0), MUST distinguish them!
// Without this, Pool TLS allocations are wrongly routed to Tiny freelist → corruption
#if !HAKMEM_BUILD_RELEASE || defined(HAKMEM_POOL_TLS_PHASE1)
// Debug/Development OR Pool TLS: Validate magic byte to catch non-header allocations
// Reason: Mid/Large allocations don't have headers, must detect and reject them
// Always validate magic byte to catch non-header allocations (release included).
// Reason: mmap-zero or mid/large frees can otherwise be misrouted as class 0.
uint8_t magic = header & 0xF0;
#if HAKMEM_DEBUG_VERBOSE
#if HAKMEM_DEBUG_VERBOSE
static int debug_count = 0;
if (debug_count < 5) {
fprintf(stderr, "[TINY_READ_HEADER] ptr=%p header=0x%02x magic=0x%02x expected=0x%02x\n",
ptr, header, magic, HEADER_MAGIC);
debug_count++;
}
#endif
#endif
if (magic != HEADER_MAGIC) {
// Invalid header - likely non-header allocation (Mid/Large/Pool TLS)
#if HAKMEM_DEBUG_VERBOSE
if (debug_count < 6) { // One more after the 5 above
fprintf(stderr, "[TINY_READ_HEADER] REJECTING ptr=%p (magic mismatch)\n", ptr);
}
#endif
#if !HAKMEM_BUILD_RELEASE
static int invalid_count = 0;
if (invalid_count < 5) {
@ -322,12 +315,6 @@ static inline int tiny_region_id_read_header(void* ptr) {
if (tiny_guard_is_enabled()) tiny_guard_on_invalid(ptr, header);
return -1;
}
#else
// Release (without Pool TLS): Skip magic validation (save 2-3 cycles)
// Safety: Bounds check below still prevents out-of-bounds array access
// Trade-off: Mid/Large frees may corrupt TLS freelist (rare, ~0.1% of frees)
// NOTE: This optimization is DISABLED when Pool TLS is enabled (different magic bytes!)
#endif
int class_idx = (int)(header & HEADER_CLASS_MASK);