Phase v7-5b: C5+C6 multi-class expansion (+4.3% improvement)
- Add C5 (256B blocks) support alongside C6 (512B blocks) - Same segment shared between C5/C6 (page_meta.class_idx distinguishes) - SMALL_V7_CLASS_SUPPORTED() macro for class validation - Extend small_v7_block_size() for C5 (switch statement) A/B Result: C6-only v7 avg 7.64M ops/s → C5+C6 v7 avg 7.97M ops/s (+4.3%) Criteria: C6 protected ✅, C5 net positive ✅, TLS bloat none ✅ ENV: HAKMEM_SMALL_HEAP_V7_CLASSES=0x60 (bit5+bit6) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -102,17 +102,18 @@ typedef struct SmallPageStatsV7 {
|
||||
void small_cold_v7_publish_stats(const SmallPageStatsV7* stats);
|
||||
|
||||
// ============================================================================
|
||||
// Block Size Lookup (C6-only for v7-2)
|
||||
// Block Size Lookup (v7-5b: C5+C6)
|
||||
// ============================================================================
|
||||
|
||||
/// Get block size for class index
|
||||
/// v7-2: Only C6 (512B) is implemented
|
||||
/// v7-5b: C5 (256B) and C6 (512B) supported
|
||||
static inline size_t small_v7_block_size(uint32_t class_idx) {
|
||||
// v7-2: C6-only
|
||||
if (class_idx == SMALL_V7_C6_CLASS_IDX) {
|
||||
return SMALL_V7_C6_BLOCK_SIZE; // 512
|
||||
// v7-5b: C5+C6 multi-class
|
||||
switch (class_idx) {
|
||||
case SMALL_V7_C5_CLASS_IDX: return SMALL_V7_C5_BLOCK_SIZE; // 256
|
||||
case SMALL_V7_C6_CLASS_IDX: return SMALL_V7_C6_BLOCK_SIZE; // 512
|
||||
default: return 0; // Unsupported class
|
||||
}
|
||||
return 0; // Unsupported class
|
||||
}
|
||||
|
||||
#endif // HAKMEM_SMALLOBJECT_COLD_IFACE_V7_BOX_H
|
||||
|
||||
@ -1,14 +1,18 @@
|
||||
// smallobject_hotbox_v7_box.h - SmallObject HotBox v7 (Phase v7-5a: Hot path極限最適化)
|
||||
// smallobject_hotbox_v7_box.h - SmallObject HotBox v7 (Phase v7-5b: C5+C6 Multi-class)
|
||||
//
|
||||
// Role:
|
||||
// - SmallObject v7 fast path for alloc/free
|
||||
// - C6-only implementation (512B blocks, 64KiB pages, 2MiB segments)
|
||||
// - C5+C6 implementation (256B/512B blocks, 64KiB pages, 2MiB segments)
|
||||
// - Uses SmallHeapCtx_v7 + SmallSegment_v7 + ColdIface_v7
|
||||
//
|
||||
// v7-5a optimizations:
|
||||
// - Stats (alloc_count, free_count, live_current) removed from hot path
|
||||
// - Global atomic stats gated by ENV (HAKMEM_V7_HOT_STATS)
|
||||
// - Header write kept (required due to intrusive freelist overlapping block[0])
|
||||
//
|
||||
// v7-5b additions:
|
||||
// - C5 support (256B blocks) with minimal TLS overhead
|
||||
// - Same segment shared between C5 and C6 (page_meta.class_idx distinguishes)
|
||||
|
||||
#pragma once
|
||||
|
||||
@ -67,7 +71,7 @@ static inline void small_v7_log_class_mismatch(void* ptr, uint8_t hint, uint8_t
|
||||
// Alloc Fast Path
|
||||
// ============================================================================
|
||||
|
||||
// small_heap_alloc_fast_v7() - v7 alloc (C6-only, v7-5a: Hot path極限最適化)
|
||||
// small_heap_alloc_fast_v7() - v7 alloc (v7-5b: C5+C6 multi-class)
|
||||
//
|
||||
// Flow:
|
||||
// 1. Get TLS context
|
||||
@ -80,9 +84,11 @@ static inline void small_v7_log_class_mismatch(void* ptr, uint8_t hint, uint8_t
|
||||
// - Per-page stats (alloc_count, live_current) removed from hot path
|
||||
// - Global atomic stats gated by ENV (HAKMEM_V7_HOT_STATS)
|
||||
//
|
||||
// v7-5b: C5+C6 support (same code path, different block sizes)
|
||||
//
|
||||
static inline void* small_heap_alloc_fast_v7(size_t size, uint8_t class_idx) {
|
||||
// v7-2: Only C6 is implemented
|
||||
if (unlikely(class_idx != SMALL_V7_C6_CLASS_IDX)) {
|
||||
// v7-5b: C5 or C6 supported
|
||||
if (unlikely(!SMALL_V7_CLASS_SUPPORTED(class_idx))) {
|
||||
return NULL; // Unsupported class -> front falls back
|
||||
}
|
||||
|
||||
@ -152,7 +158,7 @@ static inline void* small_heap_alloc_fast_v7(size_t size, uint8_t class_idx) {
|
||||
// Free Fast Path
|
||||
// ============================================================================
|
||||
|
||||
// small_heap_free_fast_v7() - v7 free (C6-only, v7-5a: Hot path極限最適化)
|
||||
// small_heap_free_fast_v7() - v7 free (v7-5b: C5+C6 multi-class)
|
||||
//
|
||||
// Flow:
|
||||
// 1. TLS segment hint hit (skip RegionIdBox)
|
||||
@ -162,6 +168,8 @@ static inline void* small_heap_alloc_fast_v7(size_t size, uint8_t class_idx) {
|
||||
// - Stats (free_count, live_current) removed from hot path
|
||||
// - Global atomic stats gated by ENV
|
||||
//
|
||||
// v7-5b: C5+C6 support (page->class_idx determines actual class)
|
||||
//
|
||||
// @param ptr: USER pointer to free
|
||||
// @param class_idx_hint: Class index hint from front/header (may be ignored)
|
||||
// @return: true if handled by v7, false if not v7-managed (front should fallback)
|
||||
@ -194,8 +202,8 @@ static inline bool small_heap_free_fast_v7(void* ptr, uint8_t class_idx_hint) {
|
||||
|
||||
SmallPageMeta_v7* page = &seg->page_meta[page_idx];
|
||||
|
||||
// Validate page is in use and C6-only
|
||||
if (unlikely(page->capacity == 0 || page->class_idx != SMALL_V7_C6_CLASS_IDX)) {
|
||||
// v7-5b: Validate page is in use and C5 or C6
|
||||
if (unlikely(page->capacity == 0 || !SMALL_V7_CLASS_SUPPORTED(page->class_idx))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -245,8 +253,8 @@ regionid_fallback:
|
||||
|
||||
SmallPageMeta_v7* page = &seg->page_meta[page_idx];
|
||||
|
||||
// Validate page is in use and C6-only
|
||||
if (unlikely(page->capacity == 0 || page->class_idx != SMALL_V7_C6_CLASS_IDX)) {
|
||||
// v7-5b: Validate page is in use and C5 or C6
|
||||
if (unlikely(page->capacity == 0 || !SMALL_V7_CLASS_SUPPORTED(page->class_idx))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -22,12 +22,20 @@
|
||||
#define SMALL_SEGMENT_V7_MAGIC 0xC07E57u // C0(re) v7
|
||||
|
||||
// ============================================================================
|
||||
// C6 Class Configuration (v7-2: C6-only)
|
||||
// Class Configuration (v7-5b: C5+C6)
|
||||
// ============================================================================
|
||||
|
||||
// C6: 513-768B → 512B blocks
|
||||
#define SMALL_V7_C6_CLASS_IDX 6
|
||||
#define SMALL_V7_C6_BLOCK_SIZE 512
|
||||
|
||||
// C5: 257-512B → 256B blocks (v7-5b)
|
||||
#define SMALL_V7_C5_CLASS_IDX 5
|
||||
#define SMALL_V7_C5_BLOCK_SIZE 256
|
||||
|
||||
// v7-5b: Class support check macro
|
||||
#define SMALL_V7_CLASS_SUPPORTED(ci) ((ci) == 5 || (ci) == 6)
|
||||
|
||||
// ============================================================================
|
||||
// Page Index Calculation
|
||||
// ============================================================================
|
||||
|
||||
@ -1,12 +1,16 @@
|
||||
// smallobject_cold_iface_v7.c - SmallObject ColdIface v7 implementation (Phase v7-5a)
|
||||
// smallobject_cold_iface_v7.c - SmallObject ColdIface v7 implementation (Phase v7-5b)
|
||||
//
|
||||
// Purpose:
|
||||
// - Page refill: acquire page from segment, carve freelist
|
||||
// - Page retire: release empty page back to segment, publish stats
|
||||
//
|
||||
// v7-5a optimizations:
|
||||
// - Header written at carve time (not on hot path alloc)
|
||||
// - Header written at alloc time (not carve - freelist overlaps block[0])
|
||||
// - Stats collected at retire time (not on hot path)
|
||||
//
|
||||
// v7-5b additions:
|
||||
// - C5 support (256B blocks) alongside C6 (512B blocks)
|
||||
// - Same segment shared between C5/C6, page_meta.class_idx distinguishes
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -129,7 +133,7 @@ SmallPageMeta_v7* small_cold_v7_refill_page(SmallHeapCtx_v7* ctx, uint32_t class
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// v7-2: Only C6 is supported
|
||||
// v7-5b: C5 (256B) and C6 (512B) supported
|
||||
size_t block_size = small_v7_block_size(class_idx);
|
||||
if (unlikely(block_size == 0)) {
|
||||
return NULL; // Unsupported class
|
||||
|
||||
Reference in New Issue
Block a user