Files
hakmem/core/hakmem_smallmid.h
Moe Charm (CI) e7710982f8 Phase 2-Opt1: Force inline range check functions (neutral perf)
Changes:
- smallmid_is_in_range(): Add __attribute__((always_inline))
- mid_is_in_range(): Add __attribute__((always_inline))

Expected: Reduce function call overhead in Front Gate routing
Result: Neutral performance (~72M ops/s, same as Phase 1 final)

Analysis:
- Compiler was already inlining these simple functions with -O3 -flto
- 36M branches identified by perf are NOT from Front Gate routing
- Most branches are inside allocators (tiny_alloc, free, etc.)
- Front Gate optimization had minimal impact, as predicted

Next: SuperSlab size optimization (clear 3-5% benefit expected)

Files:
- core/hakmem_smallmid.h:116-119
- core/hakmem_mid_mt.h:228-231

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-28 18:14:31 +09:00

245 lines
7.6 KiB
C

/**
* hakmem_smallmid.h - Small-Mid Allocator Box (256B-4KB)
*
* Phase 17: Dedicated allocator layer for 256B-4KB range
* Goal: Bridge the gap between Tiny (0-255B) and Mid (8KB+)
*
* Design Principles:
* - Dedicated SuperSlab pool (completely separated from Tiny)
* - 5 size classes: 256B / 512B / 1KB / 2KB / 4KB
* - TLS freelist (same structure as Tiny TLS SLL)
* - Header-based fast free (Phase 7 technology)
* - ENV control: HAKMEM_SMALLMID_ENABLE=1 for A/B testing
*
* Target Performance:
* - Current: Tiny C6/C7 (512B/1KB) = 5.5M-5.9M ops/s (~6% of system malloc)
* - Goal: Small-Mid = 10M-20M ops/s (2-4x improvement)
*
* Architecture Boundaries:
* Tiny: 0-255B (C0-C5, existing design unchanged)
* Small-Mid: 256B-4KB (SM0-SM4, NEW!)
* Mid: 8KB-32KB (existing, page-unit efficient)
*
* Created: 2025-11-16 (Phase 17)
*/
#ifndef HAKMEM_SMALLMID_H
#define HAKMEM_SMALLMID_H
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
// ============================================================================
// Size Classes (Phase 17-1: Front Box Only, 3 classes)
// ============================================================================
#define SMALLMID_NUM_CLASSES 3
// Size class indices
#define SMALLMID_CLASS_256B 0 // 256B blocks
#define SMALLMID_CLASS_512B 1 // 512B blocks
#define SMALLMID_CLASS_1KB 2 // 1KB blocks
// Size boundaries
#define SMALLMID_MIN_SIZE (256) // 256B (must be > Tiny max when enabled)
#define SMALLMID_MAX_SIZE (1024) // 1KB (reduced for Phase 17-1)
// ============================================================================
// TLS Freelist State
// ============================================================================
/**
* TLS freelist state (per-thread, per-class)
* - Same structure as Tiny TLS SLL
* - Completely separated from Tiny to avoid competition
*/
extern __thread void* g_smallmid_tls_head[SMALLMID_NUM_CLASSES];
extern __thread uint32_t g_smallmid_tls_count[SMALLMID_NUM_CLASSES];
// Capacity limits (per-class TLS cache)
// Phase 17-1: Conservative limits for Front Box
#define SMALLMID_TLS_CAPACITY_256B 32
#define SMALLMID_TLS_CAPACITY_512B 24
#define SMALLMID_TLS_CAPACITY_1KB 16
// ============================================================================
// Size Class Mapping
// ============================================================================
/**
* g_smallmid_class_sizes - Size class stride table
* Phase 17-1: [SM0]=256, [SM1]=512, [SM2]=1024
*/
extern const size_t g_smallmid_class_sizes[SMALLMID_NUM_CLASSES];
/**
* smallmid_size_to_class - Convert size to size class index
*
* @param size Allocation size (256-1024)
* @return Size class index (0-2), or -1 if out of range
*/
static inline int smallmid_size_to_class(size_t size) {
if (size <= 256) return SMALLMID_CLASS_256B;
if (size <= 512) return SMALLMID_CLASS_512B;
if (size <= 1024) return SMALLMID_CLASS_1KB;
return -1; // Out of range
}
/**
* smallmid_class_to_size - Convert size class to block size
*
* @param class_idx Size class index (0-2)
* @return Block size in bytes (256/512/1024)
*/
static inline size_t smallmid_class_to_size(int class_idx) {
static const size_t sizes[SMALLMID_NUM_CLASSES] = {
256, 512, 1024
};
return (class_idx >= 0 && class_idx < SMALLMID_NUM_CLASSES) ? sizes[class_idx] : 0;
}
/**
* smallmid_is_in_range - Check if size is in Small-Mid range
*
* @param size Allocation size
* @return true if 256B ≤ size ≤ 1KB
*
* PERF_OPT: Force inline to eliminate function call overhead in hot path
*/
__attribute__((always_inline))
static inline bool smallmid_is_in_range(size_t size) {
return (size >= SMALLMID_MIN_SIZE && size <= SMALLMID_MAX_SIZE);
}
/**
* smallmid_tls_capacity - Get TLS cache capacity for given class
*
* @param class_idx Size class index (0-2)
* @return TLS cache capacity
*/
static inline uint32_t smallmid_tls_capacity(int class_idx) {
static const uint32_t capacities[SMALLMID_NUM_CLASSES] = {
SMALLMID_TLS_CAPACITY_256B,
SMALLMID_TLS_CAPACITY_512B,
SMALLMID_TLS_CAPACITY_1KB
};
return (class_idx >= 0 && class_idx < SMALLMID_NUM_CLASSES) ? capacities[class_idx] : 0;
}
// ============================================================================
// API Functions
// ============================================================================
/**
* smallmid_init - Initialize Small-Mid allocator
*
* Call once at startup (thread-safe, idempotent)
* Sets up dedicated SuperSlab pool and TLS state
*/
void smallmid_init(void);
/**
* smallmid_alloc - Allocate memory from Small-Mid pool (256B-4KB)
*
* @param size Allocation size (must be 256 ≤ size ≤ 4096)
* @return Allocated pointer with header, or NULL on failure
*
* Thread-safety: Lock-free (uses TLS)
* Performance: O(1) fast path (TLS freelist pop/push)
*
* Fast path:
* 1. Check TLS freelist (most common, ~3-5 instructions)
* 2. Refill from dedicated SuperSlab if TLS empty
* 3. Allocate new SuperSlab if pool exhausted (rare)
*
* Header layout (Phase 7 compatible):
* [1 byte header: 0xa0 | class_idx][user data]
*/
void* smallmid_alloc(size_t size);
/**
* smallmid_free - Free memory allocated by smallmid_alloc
*
* @param ptr Pointer to free (must be from smallmid_alloc)
*
* Thread-safety: Lock-free if freeing to own thread's TLS
* Performance: O(1) fast path (header-based class identification)
*
* Header-based fast free (Phase 7 technology):
* - Read 1-byte header to get class_idx
* - Push to TLS freelist (or remote drain if TLS full)
*/
void smallmid_free(void* ptr);
/**
* smallmid_thread_exit - Cleanup thread-local state
*
* Called on thread exit to release TLS resources
* Should be registered via pthread_key_create or __attribute__((destructor))
*/
void smallmid_thread_exit(void);
// ============================================================================
// ENV Control
// ============================================================================
/**
* smallmid_is_enabled - Check if Small-Mid allocator is enabled
*
* ENV: HAKMEM_SMALLMID_ENABLE=1 to enable (default: 0 / disabled)
* @return true if enabled, false otherwise
*/
bool smallmid_is_enabled(void);
// ============================================================================
// Configuration
// ============================================================================
// Enable/disable Small-Mid allocator (ENV controlled, default OFF)
#ifndef HAKMEM_SMALLMID_ENABLE
#define HAKMEM_SMALLMID_ENABLE 0
#endif
// Debug logging
#ifndef SMALLMID_DEBUG
#define SMALLMID_DEBUG 0 // DISABLE for performance testing
#endif
#if SMALLMID_DEBUG
#include <stdio.h>
#define SMALLMID_LOG(fmt, ...) fprintf(stderr, "[SMALLMID] " fmt "\n", ##__VA_ARGS__)
#else
#define SMALLMID_LOG(fmt, ...) ((void)0)
#endif
// ============================================================================
// Statistics (Debug/Profiling)
// ============================================================================
#ifdef HAKMEM_SMALLMID_STATS
typedef struct SmallMidStats {
uint64_t total_allocs; // Total allocations
uint64_t total_frees; // Total frees
uint64_t tls_hits; // TLS freelist hits
uint64_t tls_misses; // TLS freelist misses (refill)
uint64_t superslab_refills; // SuperSlab refill count
} SmallMidStats;
extern SmallMidStats g_smallmid_stats;
void smallmid_print_stats(void);
#endif
#ifdef __cplusplus
}
#endif
#endif // HAKMEM_SMALLMID_H