Files
hakmem/core/hakmem_mid_mt.h

276 lines
8.9 KiB
C
Raw Normal View History

/**
* hakmem_mid_mt.h
*
* Mid Range Multi-threaded Allocator (8-32KB)
* mimalloc-style per-thread segment design for optimal MT performance
*
* Part of Hybrid Approach:
* - 1KB: Tiny Pool (static optimization, P0 complete)
* - 8-32KB: Mid MT (this module, mimalloc-style per-thread)
* - 64KB: Large Pool (learning-based, ELO strategies)
*
* Created: 2025-11-01
* Goal: 46M 100-120M ops/s (2.2-2.6x improvement)
*/
#ifndef HAKMEM_MID_MT_H
#define HAKMEM_MID_MT_H
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <pthread.h>
#ifdef __cplusplus
extern "C" {
#endif
// ============================================================================
// Size Classes
// ============================================================================
#define MID_SIZE_CLASS_8K 0 // 8KB blocks
#define MID_SIZE_CLASS_16K 1 // 16KB blocks
#define MID_SIZE_CLASS_32K 2 // 32KB blocks
#define MID_NUM_CLASSES 3 // Total number of size classes
#define MID_MIN_SIZE (8 * 1024) // 8KB
#define MID_MAX_SIZE (32 * 1024) // 32KB
#define MID_CHUNK_SIZE (4 * 1024 * 1024) // 4MB chunks (same as mimalloc segments)
// ============================================================================
// Data Structures
// ============================================================================
/**
* MidThreadSegment - Per-thread segment for lock-free allocation
*
* Memory layout optimized for cache line alignment (64 bytes)
* - Cache line 0: Fast path fields (free_list, current, end, used_count)
* - Cache line 1: Metadata (chunk_base, sizes, capacity)
* - Cache line 2: Statistics (optional, for debugging)
*/
typedef struct MidThreadSegment {
// === Fast Path (Cache line 0) ===
void* free_list; // Free objects linked list (NULL if empty)
void* current; // Bump allocation pointer
void* end; // End of current chunk
uint32_t used_count; // Number of allocated blocks
uint32_t padding0; // Alignment padding
// === Metadata (Cache line 1) ===
void* chunk_base; // Base address of current chunk
size_t chunk_size; // Size of chunk (typically 64KB)
size_t block_size; // Size of each block (8KB/16KB/32KB)
uint32_t capacity; // Total blocks in chunk
uint32_t padding1; // Alignment padding
// === Statistics (Cache line 2) ===
uint64_t alloc_count; // Total allocations
uint64_t free_count; // Total frees
uint32_t refill_count; // Number of chunk refills
uint32_t padding2; // Alignment padding
} __attribute__((aligned(64))) MidThreadSegment;
/**
* MidSegmentRegistry - Global registry for segment lookup in free()
*
* Used to find the owning segment when freeing a pointer.
* Entries are sorted by base address for O(log N) binary search.
*/
typedef struct MidSegmentRegistry {
void* base; // Segment base address
size_t block_size; // Block size (8KB/16KB/32KB)
int class_idx; // Size class index (0-2)
int padding; // Alignment padding
} MidSegmentRegistry;
/**
* MidGlobalRegistry - Global registry manager
*
* Thread-safety: Protected by pthread_mutex
* Performance: Lock only during registry operations (low frequency)
*/
typedef struct MidGlobalRegistry {
MidSegmentRegistry* entries; // Dynamic array of registry entries
uint32_t count; // Number of entries
uint32_t capacity; // Array capacity
pthread_mutex_t lock; // Registry lock
} MidGlobalRegistry;
// ============================================================================
// Global Variables
// ============================================================================
// TLS: Each thread has its own segments (lock-free!)
extern __thread MidThreadSegment g_mid_segments[MID_NUM_CLASSES];
// Global registry (protected by lock)
extern MidGlobalRegistry g_mid_registry;
// ============================================================================
// API Functions
// ============================================================================
/**
* mid_mt_init - Initialize Mid Range MT allocator
*
* Call once at startup (thread-safe, idempotent)
*/
void mid_mt_init(void);
/**
* mid_mt_alloc - Allocate memory from Mid Range pool (8-32KB)
*
* @param size Allocation size (must be MID_MIN_SIZE size MID_MAX_SIZE)
* @return Allocated pointer (aligned to block_size), or NULL on failure
*
* Thread-safety: Lock-free (uses TLS)
* Performance: O(1) fast path, O(1) amortized
*
* Fast path:
* 1. Check free_list (most common, ~4-5 instructions)
* 2. Bump allocation if free_list empty (~6-8 instructions)
* 3. Refill chunk if segment exhausted (rare, ~0.1%)
*/
void* mid_mt_alloc(size_t size);
/**
* mid_mt_free - Free memory allocated by mid_mt_alloc
*
* @param ptr Pointer to free (must be from mid_mt_alloc)
* @param size Original allocation size (for size class lookup)
*
* Thread-safety: Lock-free if freeing to own thread's segment
* Requires registry lock if remote free (cross-thread)
* Performance: O(1) local free, O(log N) remote free (registry lookup)
*
* Note: Phase 1 implementation does not handle remote free (memory leak)
* Phase 2 will implement per-segment atomic remote free list
*/
void mid_mt_free(void* ptr, size_t size);
/**
* mid_mt_thread_exit - Cleanup thread-local segments
*
* Called on thread exit to release resources
* Should be registered via pthread_key_create or __attribute__((destructor))
*/
void mid_mt_thread_exit(void);
/**
* mid_registry_lookup - Find segment containing ptr (for free() path)
*
* @param ptr Pointer to lookup
* @param out_block_size Output: block size if found
* @param out_class_idx Output: size class index if found
* @return true if found in Mid MT registry, false otherwise
*
* Used internally by hak_free_at() to identify Mid MT allocations
*/
bool mid_registry_lookup(void* ptr, size_t* out_block_size, int* out_class_idx);
// ============================================================================
// Inline Helper Functions
// ============================================================================
/**
* mid_size_to_class - Convert size to size class index
*
* @param size Allocation size
* @return Size class index (0-2), or -1 if out of range
*/
static inline int mid_size_to_class(size_t size) {
if (size <= 8192) return MID_SIZE_CLASS_8K;
if (size <= 16384) return MID_SIZE_CLASS_16K;
if (size <= 32768) return MID_SIZE_CLASS_32K;
return -1; // Out of range
}
/**
* mid_class_to_size - Convert size class to block size
*
* @param class_idx Size class index (0-2)
* @return Block size in bytes
*/
static inline size_t mid_class_to_size(int class_idx) {
static const size_t sizes[MID_NUM_CLASSES] = {
8192, // 8KB
16384, // 16KB
32768 // 32KB
};
return (class_idx >= 0 && class_idx < MID_NUM_CLASSES) ? sizes[class_idx] : 0;
}
/**
* mid_is_in_range - Check if size is in Mid Range pool range
*
* @param size Allocation size
* @return true if 8KB size 32KB
*/
static inline bool mid_is_in_range(size_t size) {
return (size >= MID_MIN_SIZE && size <= MID_MAX_SIZE);
}
// ============================================================================
// Configuration (can be overridden via environment variables)
// ============================================================================
// Default chunk size (64KB)
#ifndef MID_DEFAULT_CHUNK_SIZE
#define MID_DEFAULT_CHUNK_SIZE (64 * 1024)
#endif
// Initial registry capacity
#ifndef MID_REGISTRY_INITIAL_CAPACITY
#define MID_REGISTRY_INITIAL_CAPACITY 64
#endif
// Enable/disable statistics collection
#ifndef MID_ENABLE_STATS
#define MID_ENABLE_STATS 0 // DISABLED for performance
#endif
// Enable/disable debug logging
#ifndef MID_DEBUG
#define MID_DEBUG 0 // DISABLE for performance testing
#endif
#if MID_DEBUG
#include <stdio.h>
#define MID_LOG(fmt, ...) fprintf(stderr, "[MID_MT] " fmt "\n", ##__VA_ARGS__)
#else
#define MID_LOG(fmt, ...) ((void)0)
#endif
// ============================================================================
// Statistics (Debug/Profiling)
// ============================================================================
#if MID_ENABLE_STATS
/**
* MidStats - Global statistics for profiling
*/
typedef struct MidStats {
uint64_t total_allocs; // Total allocations
uint64_t total_frees; // Total frees
uint64_t total_refills; // Total chunk refills
uint64_t local_frees; // Local frees (same thread)
uint64_t remote_frees; // Remote frees (cross-thread)
uint64_t registry_lookups; // Registry lookups
} MidStats;
extern MidStats g_mid_stats;
void mid_mt_print_stats(void);
#endif // MID_ENABLE_STATS
#ifdef __cplusplus
}
#endif
#endif // HAKMEM_MID_MT_H