Phase v6-5: C5 extension for SmallObject Core v6
Extend v6 architecture to support C5 (129-256B) in addition to C6 (257-512B): - SmallHeapCtxV6: Add tls_freelist_c5[32] and tls_count_c5 for C5 TLS cache - smallsegment_v6_box.h: Add SMALL_V6_C5_CLASS_IDX (5) and C5_BLOCK_SIZE (256) - smallobject_cold_iface_v6.c: Generalize refill_page for both C5 (256 blocks/page) and C6 (128 blocks/page) - smallobject_core_v6.c: Add C5 fast path (alloc/free) with TLS batching Performance (v6 C5 enabled): - C5-heavy: 41.0M ops/s (-23% vs v6 OFF 53.6M) - needs optimization - Mixed: 36.2M ops/s (-18% vs v6 OFF 44.0M) - functional baseline Note: C5 route requires optimization in next phase to match v6-3 performance. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -32,6 +32,10 @@ struct SmallHeapCtxV6 {
|
|||||||
void* tls_freelist_c6[SMALL_V6_TLS_CAP];
|
void* tls_freelist_c6[SMALL_V6_TLS_CAP];
|
||||||
uint8_t tls_count_c6;
|
uint8_t tls_count_c6;
|
||||||
|
|
||||||
|
// C5 TLS freelist (Phase v6-5)
|
||||||
|
void* tls_freelist_c5[SMALL_V6_TLS_CAP];
|
||||||
|
uint8_t tls_count_c5;
|
||||||
|
|
||||||
// TLS segment ownership (for fast check)
|
// TLS segment ownership (for fast check)
|
||||||
uintptr_t tls_seg_base;
|
uintptr_t tls_seg_base;
|
||||||
uintptr_t tls_seg_end;
|
uintptr_t tls_seg_end;
|
||||||
|
|||||||
@ -16,6 +16,10 @@
|
|||||||
#define SMALL_V6_C6_CLASS_IDX 6
|
#define SMALL_V6_C6_CLASS_IDX 6
|
||||||
#define SMALL_V6_C6_BLOCK_SIZE 512
|
#define SMALL_V6_C6_BLOCK_SIZE 512
|
||||||
|
|
||||||
|
// C5 configuration (Phase v6-5)
|
||||||
|
#define SMALL_V6_C5_CLASS_IDX 5
|
||||||
|
#define SMALL_V6_C5_BLOCK_SIZE 256
|
||||||
|
|
||||||
// Page index calculation macro (requires 'seg' variable in scope)
|
// Page index calculation macro (requires 'seg' variable in scope)
|
||||||
#define SMALL_V6_PAGE_IDX(seg, addr) (((uintptr_t)(addr) - (seg)->base) >> SMALL_PAGE_V6_SHIFT)
|
#define SMALL_V6_PAGE_IDX(seg, addr) (((uintptr_t)(addr) - (seg)->base) >> SMALL_PAGE_V6_SHIFT)
|
||||||
|
|
||||||
|
|||||||
@ -11,11 +11,16 @@
|
|||||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Refill page for given class (C6-only in v6-3)
|
// Refill page for given class (C6 and C5 in v6-5)
|
||||||
SmallPageMetaV6* small_cold_v6_refill_page(uint32_t class_idx) {
|
SmallPageMetaV6* small_cold_v6_refill_page(uint32_t class_idx) {
|
||||||
// v6-3: C6-only implementation
|
// v6-5: Support C5 and C6
|
||||||
if (unlikely(class_idx != SMALL_V6_C6_CLASS_IDX)) {
|
size_t block_size;
|
||||||
return NULL;
|
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
|
||||||
|
block_size = SMALL_V6_C6_BLOCK_SIZE; // 512
|
||||||
|
} else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
|
||||||
|
block_size = SMALL_V6_C5_BLOCK_SIZE; // 256
|
||||||
|
} else {
|
||||||
|
return NULL; // Unsupported class
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get or acquire TLS segment
|
// Get or acquire TLS segment
|
||||||
@ -37,9 +42,9 @@ SmallPageMetaV6* small_cold_v6_refill_page(uint32_t class_idx) {
|
|||||||
return NULL; // All pages in use
|
return NULL; // All pages in use
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize page metadata for C6
|
// Initialize page metadata
|
||||||
page->class_idx = (uint8_t)class_idx;
|
page->class_idx = (uint8_t)class_idx;
|
||||||
page->capacity = SMALL_PAGE_V6_SIZE / SMALL_V6_C6_BLOCK_SIZE; // 128 blocks
|
page->capacity = SMALL_PAGE_V6_SIZE / block_size; // C6: 128, C5: 256
|
||||||
page->used = 0;
|
page->used = 0;
|
||||||
page->flags = 0;
|
page->flags = 0;
|
||||||
|
|
||||||
@ -51,7 +56,7 @@ SmallPageMetaV6* small_cold_v6_refill_page(uint32_t class_idx) {
|
|||||||
// Build intrusive freelist (last to first for cache locality)
|
// Build intrusive freelist (last to first for cache locality)
|
||||||
void* freelist = NULL;
|
void* freelist = NULL;
|
||||||
for (int i = (int)page->capacity - 1; i >= 0; i--) {
|
for (int i = (int)page->capacity - 1; i >= 0; i--) {
|
||||||
uint8_t* block = base + ((size_t)i * SMALL_V6_C6_BLOCK_SIZE);
|
uint8_t* block = base + ((size_t)i * block_size);
|
||||||
|
|
||||||
// Build freelist using BASE pointers
|
// Build freelist using BASE pointers
|
||||||
void* next = freelist;
|
void* next = freelist;
|
||||||
|
|||||||
@ -81,17 +81,32 @@ void* small_alloc_fast_v6(size_t size,
|
|||||||
|
|
||||||
uint8_t route = snap->route_kind[class_idx];
|
uint8_t route = snap->route_kind[class_idx];
|
||||||
|
|
||||||
// Check if this is CORE_V6 route and C6 class
|
// v6-5: Support C6 and C5 classes
|
||||||
if (route != TINY_ROUTE_SMALL_HEAP_V6 || class_idx != SMALL_V6_C6_CLASS_IDX) {
|
if (route != TINY_ROUTE_SMALL_HEAP_V6) {
|
||||||
return hak_pool_try_alloc(size, 0);
|
return hak_pool_try_alloc(size, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// C6 fast path
|
||||||
|
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
|
||||||
// Fast path: TLS freelist hit
|
// Fast path: TLS freelist hit
|
||||||
if (likely(ctx->tls_count_c6 > 0)) {
|
if (likely(ctx->tls_count_c6 > 0)) {
|
||||||
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
|
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
|
||||||
// v6-3: Header already written during refill, just return USER pointer
|
// v6-3: Header already written during refill, just return USER pointer
|
||||||
return SMALL_V6_USER_FROM_BASE(blk);
|
return SMALL_V6_USER_FROM_BASE(blk);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
// C5 fast path (Phase v6-5)
|
||||||
|
else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
|
||||||
|
// Fast path: TLS freelist hit
|
||||||
|
if (likely(ctx->tls_count_c5 > 0)) {
|
||||||
|
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
|
||||||
|
return SMALL_V6_USER_FROM_BASE(blk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Unsupported class for v6
|
||||||
|
return hak_pool_try_alloc(size, 0);
|
||||||
|
}
|
||||||
|
|
||||||
// Slow path: refill TLS with multiple blocks (batching)
|
// Slow path: refill TLS with multiple blocks (batching)
|
||||||
SmallPageMetaV6* page = small_cold_v6_refill_page(class_idx);
|
SmallPageMetaV6* page = small_cold_v6_refill_page(class_idx);
|
||||||
@ -99,21 +114,20 @@ void* small_alloc_fast_v6(size_t size,
|
|||||||
return hak_pool_try_alloc(size, 0); // Safety fallback
|
return hak_pool_try_alloc(size, 0); // Safety fallback
|
||||||
}
|
}
|
||||||
|
|
||||||
// v6-3: Batch refill - fill TLS with as many blocks as possible
|
// v6-5: Batch refill - support C6 and C5
|
||||||
// AND write headers in batch (not per-alloc)
|
|
||||||
uint8_t header_byte = SMALL_V6_HEADER_FROM_CLASS(class_idx);
|
uint8_t header_byte = SMALL_V6_HEADER_FROM_CLASS(class_idx);
|
||||||
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c6; // Currently 0, so max_fill = 32
|
|
||||||
|
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
|
||||||
|
// C6 refill path
|
||||||
|
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c6;
|
||||||
int filled = 0;
|
int filled = 0;
|
||||||
|
|
||||||
// Fill TLS (leave room for 1 to return)
|
// Fill TLS (leave room for 1 to return)
|
||||||
while (page->free_list && filled < max_fill - 1) {
|
while (page->free_list && filled < max_fill - 1) {
|
||||||
void* blk = page->free_list;
|
void* blk = page->free_list;
|
||||||
page->free_list = *(void**)blk;
|
page->free_list = *(void**)blk;
|
||||||
|
|
||||||
// v6-3: Write header NOW (after pop, before storing in TLS)
|
|
||||||
((uint8_t*)blk)[0] = header_byte;
|
((uint8_t*)blk)[0] = header_byte;
|
||||||
|
ctx->tls_freelist_c6[ctx->tls_count_c6++] = blk;
|
||||||
ctx->tls_freelist_c6[ctx->tls_count_c6++] = blk; // Store BASE
|
|
||||||
filled++;
|
filled++;
|
||||||
}
|
}
|
||||||
page->used += filled;
|
page->used += filled;
|
||||||
@ -123,8 +137,6 @@ void* small_alloc_fast_v6(size_t size,
|
|||||||
void* blk = page->free_list;
|
void* blk = page->free_list;
|
||||||
page->free_list = *(void**)blk;
|
page->free_list = *(void**)blk;
|
||||||
page->used++;
|
page->used++;
|
||||||
|
|
||||||
// v6-3: Write header and return USER pointer
|
|
||||||
((uint8_t*)blk)[0] = header_byte;
|
((uint8_t*)blk)[0] = header_byte;
|
||||||
return SMALL_V6_USER_FROM_BASE(blk);
|
return SMALL_V6_USER_FROM_BASE(blk);
|
||||||
}
|
}
|
||||||
@ -132,9 +144,39 @@ void* small_alloc_fast_v6(size_t size,
|
|||||||
// If we filled TLS but no more blocks, pop from TLS
|
// If we filled TLS but no more blocks, pop from TLS
|
||||||
if (ctx->tls_count_c6 > 0) {
|
if (ctx->tls_count_c6 > 0) {
|
||||||
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
|
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
|
||||||
// Header already written in the loop above
|
|
||||||
return SMALL_V6_USER_FROM_BASE(blk);
|
return SMALL_V6_USER_FROM_BASE(blk);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
|
||||||
|
// C5 refill path (Phase v6-5)
|
||||||
|
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c5;
|
||||||
|
int filled = 0;
|
||||||
|
|
||||||
|
// Fill TLS (leave room for 1 to return)
|
||||||
|
while (page->free_list && filled < max_fill - 1) {
|
||||||
|
void* blk = page->free_list;
|
||||||
|
page->free_list = *(void**)blk;
|
||||||
|
((uint8_t*)blk)[0] = header_byte;
|
||||||
|
ctx->tls_freelist_c5[ctx->tls_count_c5++] = blk;
|
||||||
|
filled++;
|
||||||
|
}
|
||||||
|
page->used += filled;
|
||||||
|
|
||||||
|
// Pop one more to return to caller
|
||||||
|
if (page->free_list) {
|
||||||
|
void* blk = page->free_list;
|
||||||
|
page->free_list = *(void**)blk;
|
||||||
|
page->used++;
|
||||||
|
((uint8_t*)blk)[0] = header_byte;
|
||||||
|
return SMALL_V6_USER_FROM_BASE(blk);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we filled TLS but no more blocks, pop from TLS
|
||||||
|
if (ctx->tls_count_c5 > 0) {
|
||||||
|
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
|
||||||
|
return SMALL_V6_USER_FROM_BASE(blk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Should not reach here
|
// Should not reach here
|
||||||
return hak_pool_try_alloc(size, 0);
|
return hak_pool_try_alloc(size, 0);
|
||||||
@ -161,8 +203,8 @@ void small_free_fast_v6(void* ptr,
|
|||||||
|
|
||||||
uint8_t route = snap->route_kind[class_idx];
|
uint8_t route = snap->route_kind[class_idx];
|
||||||
|
|
||||||
// Check if this is CORE_V6 route and C6 class
|
// v6-5: Check if this is CORE_V6 route
|
||||||
if (route != TINY_ROUTE_SMALL_HEAP_V6 || class_idx != SMALL_V6_C6_CLASS_IDX) {
|
if (route != TINY_ROUTE_SMALL_HEAP_V6) {
|
||||||
hak_pool_free(ptr, 0, 0);
|
hak_pool_free(ptr, 0, 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -172,8 +214,14 @@ void small_free_fast_v6(void* ptr,
|
|||||||
|
|
||||||
// Fast path: TLS segment ownership + TLS push
|
// Fast path: TLS segment ownership + TLS push
|
||||||
if (likely(small_tls_owns_ptr_v6(ctx, ptr))) {
|
if (likely(small_tls_owns_ptr_v6(ctx, ptr))) {
|
||||||
if (ctx->tls_count_c6 < SMALL_V6_TLS_CAP) {
|
// C6 TLS push
|
||||||
ctx->tls_freelist_c6[ctx->tls_count_c6++] = base; // Store BASE
|
if (class_idx == SMALL_V6_C6_CLASS_IDX && ctx->tls_count_c6 < SMALL_V6_TLS_CAP) {
|
||||||
|
ctx->tls_freelist_c6[ctx->tls_count_c6++] = base;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// C5 TLS push (Phase v6-5)
|
||||||
|
if (class_idx == SMALL_V6_C5_CLASS_IDX && ctx->tls_count_c5 < SMALL_V6_TLS_CAP) {
|
||||||
|
ctx->tls_freelist_c5[ctx->tls_count_c5++] = base;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user