Phase 6-B: Header-based Mid MT free (lock-free, +2.65% improvement)
Performance Results (bench_mid_mt_gap, 1KB-8KB, ws=256): - Before: 41.0 M ops/s (mutex-protected registry) - After: 42.09 M ops/s (+2.65% improvement) Expected vs Actual: - Expected: +17-27% (based on perf showing 13.98% mutex overhead) - Actual: +2.65% (needs investigation) Implementation: - Added MidMTHeader (8 bytes) to each Mid MT allocation - Allocation: Write header with block_size, class_idx, magic (0xAB42) - Free: Read header for O(1) metadata lookup (no mutex!) - Eliminated entire registry infrastructure (127 lines deleted) Changes: - core/hakmem_mid_mt.h: Added MidMTHeader, removed registry structures - core/hakmem_mid_mt.c: Updated alloc/free, removed registry functions - core/box/mid_free_route_box.h: Header-based detection instead of registry lookup Code Quality: ✅ Lock-free (no pthread_mutex operations) ✅ Simpler (O(1) header read vs O(log N) binary search) ✅ Smaller binary (127 lines deleted) ✅ Positive improvement (no regression) Next: Investigate why improvement is smaller than expected 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -34,6 +34,34 @@ extern "C" {
|
||||
#define MID_SIZE_CLASS_32K 2 // 32KB blocks
|
||||
#define MID_NUM_CLASSES 3 // Total number of size classes
|
||||
|
||||
// ============================================================================
|
||||
// Phase 6-B: Header-based Allocation (Lock-free Free)
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* MidMTHeader - Per-allocation header for lock-free free()
|
||||
*
|
||||
* Prepended to each Mid MT allocation for O(1) metadata lookup.
|
||||
* Eliminates need for global registry + mutex (13.98% CPU overhead).
|
||||
*
|
||||
* Memory Layout:
|
||||
* [MidMTHeader: 8 bytes][User data: block_size - 8 bytes]
|
||||
* ^ ^
|
||||
* block returned to user
|
||||
*
|
||||
* Performance:
|
||||
* - Before: pthread_mutex_lock (8.12%) + unlock (5.86%) = 13.98% CPU
|
||||
* - After: Simple header read (~2 cycles) = 0.01% CPU
|
||||
* - Expected: +17-27% throughput improvement
|
||||
*/
|
||||
typedef struct MidMTHeader {
|
||||
uint32_t block_size; // Block size (8192/16384/32768)
|
||||
uint16_t class_idx; // Size class index (0-2)
|
||||
uint16_t magic; // Magic number for validation
|
||||
} MidMTHeader;
|
||||
|
||||
#define MID_MT_MAGIC 0xAB42 // Mid MT allocation marker
|
||||
|
||||
// Phase 13: Close Tiny/Mid gap.
|
||||
// Phase 16: Dynamic Mid min size - must start where Tiny ends
|
||||
// Tiny max size is configurable via HAKMEM_TINY_MAX_CLASS:
|
||||
@ -88,31 +116,7 @@ typedef struct MidThreadSegment {
|
||||
|
||||
} __attribute__((aligned(64))) MidThreadSegment;
|
||||
|
||||
/**
|
||||
* MidSegmentRegistry - Global registry for segment lookup in free()
|
||||
*
|
||||
* Used to find the owning segment when freeing a pointer.
|
||||
* Entries are sorted by base address for O(log N) binary search.
|
||||
*/
|
||||
typedef struct MidSegmentRegistry {
|
||||
void* base; // Segment base address
|
||||
size_t block_size; // Block size (8KB/16KB/32KB)
|
||||
int class_idx; // Size class index (0-2)
|
||||
int padding; // Alignment padding
|
||||
} MidSegmentRegistry;
|
||||
|
||||
/**
|
||||
* MidGlobalRegistry - Global registry manager
|
||||
*
|
||||
* Thread-safety: Protected by pthread_mutex
|
||||
* Performance: Lock only during registry operations (low frequency)
|
||||
*/
|
||||
typedef struct MidGlobalRegistry {
|
||||
MidSegmentRegistry* entries; // Dynamic array of registry entries
|
||||
uint32_t count; // Number of entries
|
||||
uint32_t capacity; // Array capacity
|
||||
pthread_mutex_t lock; // Registry lock
|
||||
} MidGlobalRegistry;
|
||||
// Phase 6-B: Registry structures removed (header-based free instead)
|
||||
|
||||
// ============================================================================
|
||||
// Global Variables
|
||||
@ -121,9 +125,6 @@ typedef struct MidGlobalRegistry {
|
||||
// TLS: Each thread has its own segments (lock-free!)
|
||||
extern __thread MidThreadSegment g_mid_segments[MID_NUM_CLASSES];
|
||||
|
||||
// Global registry (protected by lock)
|
||||
extern MidGlobalRegistry g_mid_registry;
|
||||
|
||||
// ============================================================================
|
||||
// API Functions
|
||||
// ============================================================================
|
||||
@ -176,17 +177,7 @@ void mid_mt_free(void* ptr, size_t size);
|
||||
*/
|
||||
void mid_mt_thread_exit(void);
|
||||
|
||||
/**
|
||||
* mid_registry_lookup - Find segment containing ptr (for free() path)
|
||||
*
|
||||
* @param ptr Pointer to lookup
|
||||
* @param out_block_size Output: block size if found
|
||||
* @param out_class_idx Output: size class index if found
|
||||
* @return true if found in Mid MT registry, false otherwise
|
||||
*
|
||||
* Used internally by hak_free_at() to identify Mid MT allocations
|
||||
*/
|
||||
bool mid_registry_lookup(void* ptr, size_t* out_block_size, int* out_class_idx);
|
||||
// Phase 6-B: mid_registry_lookup() removed (header-based free instead)
|
||||
|
||||
// ============================================================================
|
||||
// Inline Helper Functions
|
||||
|
||||
Reference in New Issue
Block a user