Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
90 lines
3.4 KiB
C
90 lines
3.4 KiB
C
// hakmem_l25_pool.h - L2.5 LargePool (64KB-1MB Large-Size Allocations)
|
|
// Purpose: Large-size pool with page-granular management
|
|
//
|
|
// Design Philosophy:
|
|
// - **5 size classes**: 64KB, 128KB, 256KB, 512KB, 1MB
|
|
// - **64KB page granularity**: 1 block (64KB), 2 pages (128KB), 4 pages (256KB), etc.
|
|
// - **Page-dedicated freelist**: 1 size/1 page bundle for locality
|
|
// - **O(1) site→shard mapping**: `shard = (pc >> 4) & (SHARDS-1)`
|
|
// - **non-empty bitmap**: O(1) empty class skip (L2 Pool pattern)
|
|
//
|
|
// Target Workloads:
|
|
// - mir (256KB): +47.8% → target < +20% (3x speedup)
|
|
// - Large allocations between L2 Pool (32KB) and BigCache (1MB)
|
|
//
|
|
// Integration: Called by hakmem.c between L2 Pool (< 64KB) and BigCache (>= 1MB)
|
|
//
|
|
// License: MIT
|
|
// Date: 2025-10-21
|
|
|
|
#ifndef HAKMEM_L25_POOL_H
|
|
#define HAKMEM_L25_POOL_H
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
|
|
// ===========================================================================
|
|
// Configuration Constants
|
|
// ===========================================================================
|
|
|
|
#define L25_NUM_CLASSES 5 // 64KB, 128KB, 256KB, 512KB, 1MB
|
|
#define L25_PAGE_SIZE (64 * 1024) // 64KB per page unit
|
|
#define L25_NUM_SHARDS 64 // Site-based sharding (power of 2)
|
|
|
|
// Size class boundaries (in bytes)
|
|
#define L25_CLASS_64KB (64 * 1024)
|
|
#define L25_CLASS_128KB (128 * 1024)
|
|
#define L25_CLASS_256KB (256 * 1024)
|
|
#define L25_CLASS_512KB (512 * 1024)
|
|
#define L25_CLASS_1MB (1024 * 1024)
|
|
|
|
// Minimum/maximum size handled by L2.5 pool
|
|
#define L25_MIN_SIZE L25_CLASS_64KB // 64KB minimum
|
|
#define L25_MAX_SIZE L25_CLASS_1MB // 1MB maximum
|
|
|
|
// ===========================================================================
|
|
// Public API
|
|
// ===========================================================================
|
|
|
|
// Initialize L2.5 pool system (called by hak_init)
|
|
void hak_l25_pool_init(void);
|
|
|
|
// Shutdown L2.5 pool system and release all pages
|
|
void hak_l25_pool_shutdown(void);
|
|
|
|
// Try to allocate from L2.5 pool (returns NULL if size not in range)
|
|
// Args: size - requested allocation size (64KB-1MB)
|
|
// site_id - call-site address (for shard selection)
|
|
// Returns: Pointer to allocated block, or NULL if pool unavailable
|
|
void* hak_l25_pool_try_alloc(size_t size, uintptr_t site_id);
|
|
|
|
// Free block back to L2.5 pool
|
|
// Args: ptr - pointer to block (from hak_l25_pool_try_alloc)
|
|
// size - original allocation size (for class determination)
|
|
// site_id - call-site address (for shard routing)
|
|
void hak_l25_pool_free(void* ptr, size_t size, uintptr_t site_id);
|
|
|
|
// Print L2.5 pool statistics (called by hak_shutdown)
|
|
void hak_l25_pool_print_stats(void);
|
|
|
|
// Stats snapshot (per-class counters). Arrays must have length L25_NUM_CLASSES.
|
|
void hak_l25_pool_stats_snapshot(uint64_t hits[], uint64_t misses[], uint64_t refills[], uint64_t frees[]);
|
|
|
|
// ===========================================================================
|
|
// Internal Helpers (for testing/debugging)
|
|
// ===========================================================================
|
|
|
|
// Get shard index from site_id (0-63)
|
|
int hak_l25_pool_get_shard_index(uintptr_t site_id);
|
|
|
|
// Check if size is in L2.5 pool range (64KB-1MB)
|
|
static inline int hak_l25_pool_is_poolable(size_t size) {
|
|
return size >= L25_MIN_SIZE && size <= L25_MAX_SIZE;
|
|
}
|
|
|
|
// Headerless L2.5 lookup/free (for hak_free_at fast path)
|
|
int hak_l25_lookup(void* user_ptr, size_t* out_size);
|
|
void hak_l25_pool_free_fast(void* user_ptr, uintptr_t site_id);
|
|
|
|
#endif // HAKMEM_L25_POOL_H
|