Phase v6-5: C5 extension for SmallObject Core v6

Extend v6 architecture to support C5 (129-256B) in addition to C6 (257-512B):

- SmallHeapCtxV6: Add tls_freelist_c5[32] and tls_count_c5 for C5 TLS cache
- smallsegment_v6_box.h: Add SMALL_V6_C5_CLASS_IDX (5) and C5_BLOCK_SIZE (256)
- smallobject_cold_iface_v6.c: Generalize refill_page for both C5 (256 blocks/page)
  and C6 (128 blocks/page)
- smallobject_core_v6.c: Add C5 fast path (alloc/free) with TLS batching

Performance (v6 C5 enabled):
- C5-heavy: 41.0M ops/s (-23% vs v6 OFF 53.6M) - needs optimization
- Mixed: 36.2M ops/s (-18% vs v6 OFF 44.0M) - functional baseline

Note: C5 route requires optimization in next phase to match v6-3 performance.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-11 15:50:14 +09:00
parent c60199182e
commit 1e04debb1b
4 changed files with 106 additions and 45 deletions

View File

@ -81,16 +81,31 @@ void* small_alloc_fast_v6(size_t size,
uint8_t route = snap->route_kind[class_idx];
// Check if this is CORE_V6 route and C6 class
if (route != TINY_ROUTE_SMALL_HEAP_V6 || class_idx != SMALL_V6_C6_CLASS_IDX) {
// v6-5: Support C6 and C5 classes
if (route != TINY_ROUTE_SMALL_HEAP_V6) {
return hak_pool_try_alloc(size, 0);
}
// Fast path: TLS freelist hit
if (likely(ctx->tls_count_c6 > 0)) {
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
// v6-3: Header already written during refill, just return USER pointer
return SMALL_V6_USER_FROM_BASE(blk);
// C6 fast path
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
// Fast path: TLS freelist hit
if (likely(ctx->tls_count_c6 > 0)) {
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
// v6-3: Header already written during refill, just return USER pointer
return SMALL_V6_USER_FROM_BASE(blk);
}
}
// C5 fast path (Phase v6-5)
else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
// Fast path: TLS freelist hit
if (likely(ctx->tls_count_c5 > 0)) {
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
else {
// Unsupported class for v6
return hak_pool_try_alloc(size, 0);
}
// Slow path: refill TLS with multiple blocks (batching)
@ -99,41 +114,68 @@ void* small_alloc_fast_v6(size_t size,
return hak_pool_try_alloc(size, 0); // Safety fallback
}
// v6-3: Batch refill - fill TLS with as many blocks as possible
// AND write headers in batch (not per-alloc)
// v6-5: Batch refill - support C6 and C5
uint8_t header_byte = SMALL_V6_HEADER_FROM_CLASS(class_idx);
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c6; // Currently 0, so max_fill = 32
int filled = 0;
// Fill TLS (leave room for 1 to return)
while (page->free_list && filled < max_fill - 1) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
// C6 refill path
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c6;
int filled = 0;
// v6-3: Write header NOW (after pop, before storing in TLS)
((uint8_t*)blk)[0] = header_byte;
// Fill TLS (leave room for 1 to return)
while (page->free_list && filled < max_fill - 1) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
((uint8_t*)blk)[0] = header_byte;
ctx->tls_freelist_c6[ctx->tls_count_c6++] = blk;
filled++;
}
page->used += filled;
ctx->tls_freelist_c6[ctx->tls_count_c6++] = blk; // Store BASE
filled++;
// Pop one more to return to caller
if (page->free_list) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
page->used++;
((uint8_t*)blk)[0] = header_byte;
return SMALL_V6_USER_FROM_BASE(blk);
}
// If we filled TLS but no more blocks, pop from TLS
if (ctx->tls_count_c6 > 0) {
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
page->used += filled;
else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
// C5 refill path (Phase v6-5)
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c5;
int filled = 0;
// Pop one more to return to caller
if (page->free_list) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
page->used++;
// Fill TLS (leave room for 1 to return)
while (page->free_list && filled < max_fill - 1) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
((uint8_t*)blk)[0] = header_byte;
ctx->tls_freelist_c5[ctx->tls_count_c5++] = blk;
filled++;
}
page->used += filled;
// v6-3: Write header and return USER pointer
((uint8_t*)blk)[0] = header_byte;
return SMALL_V6_USER_FROM_BASE(blk);
}
// Pop one more to return to caller
if (page->free_list) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
page->used++;
((uint8_t*)blk)[0] = header_byte;
return SMALL_V6_USER_FROM_BASE(blk);
}
// If we filled TLS but no more blocks, pop from TLS
if (ctx->tls_count_c6 > 0) {
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
// Header already written in the loop above
return SMALL_V6_USER_FROM_BASE(blk);
// If we filled TLS but no more blocks, pop from TLS
if (ctx->tls_count_c5 > 0) {
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
// Should not reach here
@ -161,8 +203,8 @@ void small_free_fast_v6(void* ptr,
uint8_t route = snap->route_kind[class_idx];
// Check if this is CORE_V6 route and C6 class
if (route != TINY_ROUTE_SMALL_HEAP_V6 || class_idx != SMALL_V6_C6_CLASS_IDX) {
// v6-5: Check if this is CORE_V6 route
if (route != TINY_ROUTE_SMALL_HEAP_V6) {
hak_pool_free(ptr, 0, 0);
return;
}
@ -172,8 +214,14 @@ void small_free_fast_v6(void* ptr,
// Fast path: TLS segment ownership + TLS push
if (likely(small_tls_owns_ptr_v6(ctx, ptr))) {
if (ctx->tls_count_c6 < SMALL_V6_TLS_CAP) {
ctx->tls_freelist_c6[ctx->tls_count_c6++] = base; // Store BASE
// C6 TLS push
if (class_idx == SMALL_V6_C6_CLASS_IDX && ctx->tls_count_c6 < SMALL_V6_TLS_CAP) {
ctx->tls_freelist_c6[ctx->tls_count_c6++] = base;
return;
}
// C5 TLS push (Phase v6-5)
if (class_idx == SMALL_V6_C5_CLASS_IDX && ctx->tls_count_c5 < SMALL_V6_TLS_CAP) {
ctx->tls_freelist_c5[ctx->tls_count_c5++] = base;
return;
}
}