Phase v4-mid-2, v4-mid-3, v4-mid-5: SmallObject HotBox v4 implementation and docs update

Implementation:
- SmallObject HotBox v4 (core/smallobject_hotbox_v4.c) now fully implements C6-only allocations and frees, including current/partial management and freelist operations.
- Cold Iface (tiny_heap based) for page refill/retire is integrated.
- Stats instrumentation (v4-mid-5) added to small_heap_alloc_fast_v4 and small_heap_free_fast_v4, with a new header file core/box/smallobject_hotbox_v4_stats_box.h and atexit dump function.

Updates:
- CURRENT_TASK.md has been condensed and updated with summaries of Phase v4-mid-2 (C6-only v4), Phase v4-mid-3 (C5-only v4 pilot), and the stats implementation (v4-mid-5).
- docs/analysis/SMALLOBJECT_V4_BOX_DESIGN.md updated with A/B results and conclusions for C6-only and C5-only v4 implementations.
- The previous CURRENT_TASK.md content has been archived to CURRENT_TASK_ARCHIVE_20251210.md.
This commit is contained in:
Moe Charm (CI)
2025-12-11 01:01:15 +09:00
parent 3b4449d773
commit dd974b49c5
5 changed files with 1569 additions and 1182 deletions

View File

@ -7,6 +7,7 @@
#include "box/smallobject_hotbox_v4_box.h"
#include "box/smallobject_hotbox_v4_env_box.h"
#include "box/smallobject_hotbox_v4_stats_box.h"
#include "box/smallobject_cold_iface_v4.h"
#include "box/smallobject_hotbox_v3_env_box.h"
#include "box/tiny_heap_box.h"
@ -16,6 +17,11 @@
#include "box/tiny_geometry_box.h"
#include "tiny_region_id.h"
// ============================================================================
// Stats storage (Phase v4-mid-5)
// ============================================================================
small_heap_v4_class_stats_t g_small_heap_v4_stats[8];
// ============================================================================
// v4 Segment Configuration (Phase v4-mid-0+)
// ============================================================================
@ -235,9 +241,16 @@ static small_page_v4* cold_refill_page_v4(small_heap_ctx_v4* hot_ctx, uint32_t c
tiny_heap_ctx_t* tctx = tiny_heap_ctx_for_thread();
if (!tctx) return NULL;
// Phase v4-mid-6: Get a fresh page from TinyHeap
tiny_heap_page_t* lease = tiny_heap_prepare_page(tctx, (int)class_idx);
if (!lease) return NULL;
// Clear TinyHeap's current so next call gets fresh page
tiny_heap_class_t* hcls = tiny_heap_class(tctx, (int)class_idx);
if (hcls) {
tiny_heap_class_unlink(hcls, lease);
}
return v4_page_from_lease(lease, (int)class_idx, NULL);
}
@ -350,31 +363,49 @@ static small_page_v4* small_alloc_slow_v4(small_heap_ctx_v4* ctx, int class_idx)
}
void* small_heap_alloc_fast_v4(small_heap_ctx_v4* ctx, int class_idx) {
// Phase v4-mid-1: C6 stub - fallback to pool v1
if (__builtin_expect(class_idx == 6, 0)) {
return NULL; // C6: fallback to pool v1 (no v4 alloc yet)
}
// Phase v4-mid-5: Add stats instrumentation
small_heap_v4_stat_alloc_call(class_idx);
// Phase v4-mid-2: C6-only full SmallHeapCtx v4 implementation
if (__builtin_expect(!v4_class_supported(class_idx), 0)) {
small_heap_v4_stat_alloc_fallback_pool(class_idx);
return NULL; // C5/C6/C7 以外は未対応
}
if (!small_heap_v4_class_enabled((uint8_t)class_idx)) return NULL;
small_class_heap_v4* h = &ctx->cls[class_idx];
small_page_v4* page = h->current;
if (!page || !page->freelist) {
page = small_alloc_slow_v4(ctx, class_idx);
}
if (!page || !page->freelist) {
if (!small_heap_v4_class_enabled((uint8_t)class_idx)) {
small_heap_v4_stat_alloc_fallback_pool(class_idx);
return NULL;
}
small_class_heap_v4* h = &ctx->cls[class_idx];
small_page_v4* page = h->current;
// Try current page freelist
if (page && page->freelist) {
void* blk = page->freelist;
void* next = NULL;
memcpy(&next, blk, sizeof(void*));
page->freelist = next;
page->used++;
small_heap_v4_stat_alloc_success(class_idx);
return tiny_region_id_write_header(blk, class_idx);
}
// Current exhausted or NULL, try slow path (partial/refill)
page = small_alloc_slow_v4(ctx, class_idx);
if (!page || !page->freelist) {
small_heap_v4_stat_alloc_null_page(class_idx);
small_heap_v4_stat_alloc_fallback_pool(class_idx);
return NULL;
}
// Allocate from newly acquired/promoted page
void* blk = page->freelist;
void* next = NULL;
memcpy(&next, blk, sizeof(void*));
page->freelist = next;
page->used++;
small_heap_v4_stat_alloc_success(class_idx);
return tiny_region_id_write_header(blk, class_idx);
}
@ -402,33 +433,44 @@ static void v4_unlink_from_list(small_class_heap_v4* h, v4_loc_t loc, small_page
}
void small_heap_free_fast_v4(small_heap_ctx_v4* ctx, int class_idx, void* ptr) {
// Phase v4-mid-1: C6 stub - test page_meta_of() lookup, fallback to pool v1
if (__builtin_expect(class_idx == 6, 0)) {
// C6-only: Test page_meta_of() for Fail-Fast validation
SmallSegment* dummy_seg = (SmallSegment*)NULL; // Will be retrieved later
SmallPageMeta* m = smallsegment_v4_page_meta_of(dummy_seg, ptr);
(void)m; // Unused in v4-mid-1, but confirms function works
return; // Fallback to pool v1 (handled by front)
}
// Phase v4-mid-5: Add stats instrumentation
small_heap_v4_stat_free_call(class_idx);
// Phase v4-mid-2: C6-only full SmallHeapCtx v4 implementation
if (__builtin_expect(!v4_class_supported(class_idx), 0)) {
return;
}
if (!small_heap_v4_class_enabled((uint8_t)class_idx)) return;
if (!ptr) return;
// Phase v4-mid-6: ptr is already BASE (caller converts USER→BASE before calling us)
// See malloc_tiny_fast.h L254: base = ptr - 1, then L354/L282 passes base
void* base_ptr = ptr;
small_class_heap_v4* h = &ctx->cls[class_idx];
small_page_v4* prev = NULL;
v4_loc_t loc = V4_LOC_NONE;
small_page_v4* page = v4_find_page(h, (const uint8_t*)ptr, &loc, &prev);
if (!page) return;
// Try to find page in current/partial/full lists (using BASE pointer)
small_page_v4* page = v4_find_page(h, (const uint8_t*)base_ptr, &loc, &prev);
// Phase v4-mid-2: If page not found in v4 heap, try page_meta_of() for segment lookup
if (!page) {
small_heap_v4_stat_free_page_not_found(class_idx);
// Try to find via segment mask+shift (requires segment to be initialized)
// For now, this is a fallback for future segment-based allocation
// Return without freeing (pool v1 will handle)
return;
}
small_heap_v4_stat_free_page_found(class_idx);
const uint32_t partial_limit = v4_partial_limit(class_idx);
// freelist push
// freelist push (use BASE pointer, not USER pointer)
void* head = page->freelist;
memcpy(ptr, &head, sizeof(void*));
page->freelist = ptr;
memcpy(base_ptr, &head, sizeof(void*));
page->freelist = base_ptr;
if (page->used > 0) {
page->used--;
}
@ -472,3 +514,45 @@ void small_heap_free_fast_v4(small_heap_ctx_v4* ctx, int class_idx, void* ptr) {
}
}
}
// ============================================================================
// Stats dump (Phase v4-mid-5)
// ============================================================================
void small_heap_v4_stats_dump(void) {
if (!small_heap_v4_stats_enabled()) {
return;
}
fprintf(stderr, "\n========================================\n");
fprintf(stderr, "[SMALL_HEAP_V4_STATS] Summary\n");
fprintf(stderr, "========================================\n");
for (int c = 0; c < 8; c++) {
uint64_t alloc_calls = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_calls, memory_order_relaxed);
uint64_t alloc_success = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_success, memory_order_relaxed);
uint64_t alloc_null_page = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_null_page, memory_order_relaxed);
uint64_t alloc_fallback = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_fallback_pool, memory_order_relaxed);
uint64_t free_calls = atomic_load_explicit(&g_small_heap_v4_stats[c].free_calls, memory_order_relaxed);
uint64_t free_found = atomic_load_explicit(&g_small_heap_v4_stats[c].free_page_found, memory_order_relaxed);
uint64_t free_not_found = atomic_load_explicit(&g_small_heap_v4_stats[c].free_page_not_found, memory_order_relaxed);
if (alloc_calls > 0 || free_calls > 0) {
fprintf(stderr, "\nClass C%d:\n", c);
fprintf(stderr, " Alloc: calls=%lu success=%lu null_page=%lu fallback_pool=%lu\n",
(unsigned long)alloc_calls, (unsigned long)alloc_success,
(unsigned long)alloc_null_page, (unsigned long)alloc_fallback);
fprintf(stderr, " Free: calls=%lu page_found=%lu page_not_found=%lu\n",
(unsigned long)free_calls, (unsigned long)free_found,
(unsigned long)free_not_found);
}
}
fprintf(stderr, "========================================\n\n");
fflush(stderr);
}
// Automatic dump at program exit
static void small_heap_v4_stats_atexit(void) __attribute__((destructor));
static void small_heap_v4_stats_atexit(void) {
small_heap_v4_stats_dump();
}