Files
hakmem/core/smallobject_hotbox_v4.c
Moe Charm (CI) dd974b49c5 Phase v4-mid-2, v4-mid-3, v4-mid-5: SmallObject HotBox v4 implementation and docs update
Implementation:
- SmallObject HotBox v4 (core/smallobject_hotbox_v4.c) now fully implements C6-only allocations and frees, including current/partial management and freelist operations.
- Cold Iface (tiny_heap based) for page refill/retire is integrated.
- Stats instrumentation (v4-mid-5) added to small_heap_alloc_fast_v4 and small_heap_free_fast_v4, with a new header file core/box/smallobject_hotbox_v4_stats_box.h and atexit dump function.

Updates:
- CURRENT_TASK.md has been condensed and updated with summaries of Phase v4-mid-2 (C6-only v4), Phase v4-mid-3 (C5-only v4 pilot), and the stats implementation (v4-mid-5).
- docs/analysis/SMALLOBJECT_V4_BOX_DESIGN.md updated with A/B results and conclusions for C6-only and C5-only v4 implementations.
- The previous CURRENT_TASK.md content has been archived to CURRENT_TASK_ARCHIVE_20251210.md.
2025-12-11 01:01:15 +09:00

559 lines
20 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// smallobject_hotbox_v4.c - SmallObject HotHeap v4 (C5/C6/C7 opt-in)
//
// Phase v4-3.1: C7 は v4 独自の freelist/current/partial で完結。C6/C5 は強ゲート付きで同形パスを使う。
#include <stdlib.h>
#include <string.h>
#include "box/smallobject_hotbox_v4_box.h"
#include "box/smallobject_hotbox_v4_env_box.h"
#include "box/smallobject_hotbox_v4_stats_box.h"
#include "box/smallobject_cold_iface_v4.h"
#include "box/smallobject_hotbox_v3_env_box.h"
#include "box/tiny_heap_box.h"
#include "box/smallsegment_v4_box.h"
#include "box/smallsegment_v4_env_box.h"
#include "box/tiny_cold_iface_v1.h"
#include "box/tiny_geometry_box.h"
#include "tiny_region_id.h"
// ============================================================================
// Stats storage (Phase v4-mid-5)
// ============================================================================
small_heap_v4_class_stats_t g_small_heap_v4_stats[8];
// ============================================================================
// v4 Segment Configuration (Phase v4-mid-0+)
// ============================================================================
#define SMALL_SEGMENT_V4_SIZE (2 * 1024 * 1024) // 2 MiB segment
#define SMALL_SEGMENT_V4_PAGE_SIZE (64 * 1024) // 64 KiB page
#define SMALL_SEGMENT_V4_MAGIC 0xDEADBEEF
#define SMALL_SEGMENT_V4_PAGE_SHIFT 16 // log2(64KiB)
// TLS context
static __thread small_heap_ctx_v4 g_ctx_v4;
// Internal segment structure (internal use only, not exposed via public box API)
typedef struct small_segment_v4_internal {
int class_idx;
size_t segment_size;
tiny_heap_ctx_t* tiny_ctx;
} small_segment_v4_internal;
static __thread small_segment_v4_internal g_segments_v4[SMALLOBJECT_NUM_CLASSES];
small_heap_ctx_v4* small_heap_ctx_v4_get(void) {
return &g_ctx_v4;
}
static small_page_v4* v4_page_from_lease(tiny_heap_page_t* lease, int class_idx, small_segment_v4* seg);
// -----------------------------------------------------------------------------
// helpers
// -----------------------------------------------------------------------------
static inline int v4_class_supported(int class_idx) {
return class_idx == 7 || class_idx == 6 || class_idx == 5;
}
static size_t smallsegment_v4_default_size(void) {
const char* env = smallsegment_v4_size_env();
if (env && *env) {
size_t v = strtoull(env, NULL, 0);
if (v > (size_t)(64 * 1024)) {
return v;
}
}
return (size_t)(2 * 1024 * 1024); // default 2MiB segment単位将来の実装用
}
small_segment_v4* smallsegment_v4_acquire(int class_idx) {
if (!v4_class_supported(class_idx)) return NULL;
small_segment_v4_internal* seg = &g_segments_v4[class_idx];
seg->class_idx = class_idx;
if (!seg->segment_size) {
seg->segment_size = smallsegment_v4_default_size();
}
if (!seg->tiny_ctx) {
seg->tiny_ctx = tiny_heap_ctx_for_thread();
}
return (small_segment_v4*)seg;
}
void* smallsegment_v4_alloc_page(small_segment_v4* seg, int class_idx) {
if (!seg || !v4_class_supported(class_idx)) return NULL;
// Internal use only: cast to internal type to access tiny_ctx
small_segment_v4_internal* int_seg = (small_segment_v4_internal*)seg;
if (!int_seg->tiny_ctx) {
int_seg->tiny_ctx = tiny_heap_ctx_for_thread();
}
tiny_heap_ctx_t* tctx = int_seg->tiny_ctx ? int_seg->tiny_ctx : tiny_heap_ctx_for_thread();
if (!tctx) return NULL;
tiny_heap_page_t* lease = tiny_heap_prepare_page(tctx, class_idx);
if (!lease) return NULL;
int_seg->tiny_ctx = tctx;
return v4_page_from_lease(lease, class_idx, seg);
}
void smallsegment_v4_release_if_empty(small_segment_v4* seg, void* page_ptr, int class_idx) {
small_page_v4* page = (small_page_v4*)page_ptr;
if (!page || !v4_class_supported(class_idx)) return;
tiny_heap_ctx_t* tctx = tiny_heap_ctx_for_thread();
tiny_heap_page_t* lease = (tiny_heap_page_t*)page->slab_ref;
if (tctx && lease) {
tiny_heap_page_becomes_empty(tctx, class_idx, lease);
}
free(page);
}
static inline void v4_page_push_partial(small_class_heap_v4* h, small_page_v4* page) {
if (!h || !page) return;
page->next = h->partial_head;
h->partial_head = page;
h->partial_count++;
}
static inline small_page_v4* v4_page_pop_partial(small_class_heap_v4* h) {
if (!h) return NULL;
small_page_v4* p = h->partial_head;
if (p) {
h->partial_head = p->next;
p->next = NULL;
if (h->partial_count > 0) {
h->partial_count--;
}
}
return p;
}
static inline void v4_page_push_full(small_class_heap_v4* h, small_page_v4* page) {
if (!h || !page) return;
page->next = h->full_head;
h->full_head = page;
}
static inline uint32_t v4_partial_limit(int class_idx) {
// C7 は refill/retire を抑えるため少し広めに保持
return (class_idx == 7) ? 2u : 1u;
}
static inline int v4_ptr_in_page(const small_page_v4* page, const uint8_t* ptr) {
if (!page || !ptr) return 0;
uint8_t* base = page->base;
size_t span = (size_t)page->block_size * (size_t)page->capacity;
if (ptr < base || ptr >= base + span) return 0;
size_t off = (size_t)(ptr - base);
return (off % page->block_size) == 0;
}
static inline void* v4_build_freelist(uint8_t* base, uint16_t capacity, size_t stride) {
void* head = NULL;
for (int i = capacity - 1; i >= 0; i--) {
uint8_t* blk = base + ((size_t)i * stride);
void* next = head;
head = blk;
memcpy(blk, &next, sizeof(void*));
}
return head;
}
typedef enum {
V4_LOC_NONE = 0,
V4_LOC_CURRENT,
V4_LOC_PARTIAL,
V4_LOC_FULL,
} v4_loc_t;
static small_page_v4* v4_find_page(small_class_heap_v4* h, const uint8_t* ptr, v4_loc_t* loc, small_page_v4** prev_out) {
if (loc) *loc = V4_LOC_NONE;
if (prev_out) *prev_out = NULL;
if (!h || !ptr) return NULL;
if (h->current && v4_ptr_in_page(h->current, ptr)) {
if (loc) *loc = V4_LOC_CURRENT;
return h->current;
}
small_page_v4* prev = NULL;
for (small_page_v4* p = h->partial_head; p; prev = p, p = p->next) {
if (v4_ptr_in_page(p, ptr)) {
if (loc) *loc = V4_LOC_PARTIAL;
if (prev_out) *prev_out = prev;
return p;
}
}
for (small_page_v4* p = h->full_head; p; prev = p, p = p->next) {
if (v4_ptr_in_page(p, ptr)) {
if (loc) *loc = V4_LOC_FULL;
if (prev_out) *prev_out = prev;
return p;
}
}
return NULL;
}
int smallobject_hotbox_v4_can_own(int class_idx, void* ptr) {
if (__builtin_expect(!v4_class_supported(class_idx), 0)) return 0;
if (!small_heap_v4_class_enabled((uint8_t)class_idx)) return 0;
if (!ptr) return 0;
small_heap_ctx_v4* ctx = small_heap_ctx_v4_get();
if (!ctx) return 0;
small_class_heap_v4* h = &ctx->cls[class_idx];
return v4_find_page(h, (const uint8_t*)ptr, NULL, NULL) != NULL;
}
// -----------------------------------------------------------------------------
// Cold iface (C5/C6/C7, Tiny v1 経由)
// -----------------------------------------------------------------------------
static small_page_v4* v4_page_from_lease(tiny_heap_page_t* lease, int class_idx, small_segment_v4* seg) {
if (!lease) return NULL;
small_page_v4* page = (small_page_v4*)malloc(sizeof(small_page_v4));
if (!page) return NULL;
memset(page, 0, sizeof(*page));
page->class_idx = (uint8_t)class_idx;
page->capacity = lease->capacity;
page->used = 0;
page->block_size = (uint32_t)tiny_stride_for_class((int)class_idx);
page->base = lease->base;
page->slab_ref = lease;
page->segment = seg;
page->freelist = v4_build_freelist(lease->base, lease->capacity, page->block_size);
if (!page->freelist) {
free(page);
return NULL;
}
page->next = NULL;
page->flags = 0;
return page;
}
static small_page_v4* cold_refill_page_v4(small_heap_ctx_v4* hot_ctx, uint32_t class_idx) {
if (__builtin_expect(!v4_class_supported((int)class_idx), 0)) return NULL;
(void)hot_ctx;
if (smallsegment_v4_enabled()) {
small_segment_v4* seg = smallsegment_v4_acquire((int)class_idx);
return (small_page_v4*)smallsegment_v4_alloc_page(seg, (int)class_idx);
}
tiny_heap_ctx_t* tctx = tiny_heap_ctx_for_thread();
if (!tctx) return NULL;
// Phase v4-mid-6: Get a fresh page from TinyHeap
tiny_heap_page_t* lease = tiny_heap_prepare_page(tctx, (int)class_idx);
if (!lease) return NULL;
// Clear TinyHeap's current so next call gets fresh page
tiny_heap_class_t* hcls = tiny_heap_class(tctx, (int)class_idx);
if (hcls) {
tiny_heap_class_unlink(hcls, lease);
}
return v4_page_from_lease(lease, (int)class_idx, NULL);
}
static void cold_retire_page_v4(small_heap_ctx_v4* hot_ctx, uint32_t class_idx, small_page_v4* page) {
(void)hot_ctx;
if (!page) return;
if (smallsegment_v4_enabled()) {
small_segment_v4* seg = (small_segment_v4*)page->segment;
smallsegment_v4_release_if_empty(seg, page, (int)class_idx);
return;
}
tiny_heap_ctx_t* tctx = tiny_heap_ctx_for_thread();
tiny_heap_page_t* lease = (tiny_heap_page_t*)page->slab_ref;
if (tctx && lease) {
tiny_heap_page_becomes_empty(tctx, (int)class_idx, lease);
}
free(page);
}
// Direct function implementations (phase v4-mid-0: cold_refill/retire を直接呼び出す)
small_page_v4* small_cold_v4_refill_page(small_heap_ctx_v4* ctx, uint32_t class_idx) {
return cold_refill_page_v4(ctx, class_idx);
}
void small_cold_v4_retire_page(small_heap_ctx_v4* ctx, small_page_v4* page) {
if (!page) return;
cold_retire_page_v4(ctx, (uint32_t)page->class_idx, page);
}
bool small_cold_v4_remote_push(small_page_v4* page, void* ptr, uint32_t tid) {
(void)page; (void)ptr; (void)tid;
return false; // stub: not yet implemented
}
void small_cold_v4_remote_drain(small_heap_ctx_v4* ctx) {
(void)ctx;
// stub: not yet implemented
}
// ============================================================================
// smallsegment_v4_page_meta_of: Pointer → Page metadata lookup
// ============================================================================
// Phase v4-mid-1: Implement mask+shift O(1) lookup for Fail-Fast validation.
//
// Algorithm:
// 1. Compute segment base: addr & ~(SMALL_SEGMENT_V4_SIZE - 1)
// 2. Verify magic number
// 3. Compute page_idx: (addr - seg_base) >> SMALL_SEGMENT_V4_PAGE_SHIFT
// 4. Return &seg->page_meta[page_idx] or NULL
small_page_v4* smallsegment_v4_page_meta_of(small_segment_v4* seg, void* ptr) {
if (!seg || !ptr) {
return NULL;
}
uintptr_t addr = (uintptr_t)ptr;
uintptr_t seg_base = addr & ~(SMALL_SEGMENT_V4_SIZE - 1);
// Verify segment pointer and magic
SmallSegment* s = (SmallSegment*)seg_base;
if (!s || s->magic != SMALL_SEGMENT_V4_MAGIC) {
return NULL;
}
// Compute page index and bounds check
size_t page_idx = (addr - seg_base) >> SMALL_SEGMENT_V4_PAGE_SHIFT;
if (page_idx >= s->num_pages) {
return NULL;
}
// Return page metadata (computed as flexible array offset)
// Note: For now, just return a non-NULL marker.
// Actual page_meta[] array will be implemented in Phase v4-mid-2.
return (SmallPageMeta*)(1); // Non-NULL sentinel for now
}
// -----------------------------------------------------------------------------
// alloc/free
// -----------------------------------------------------------------------------
static small_page_v4* small_alloc_slow_v4(small_heap_ctx_v4* ctx, int class_idx) {
small_class_heap_v4* h = &ctx->cls[class_idx];
const uint32_t partial_limit = v4_partial_limit(class_idx);
small_page_v4* cur = h->current;
if (cur && cur->freelist) {
return cur; // usable current
}
if (cur && !cur->freelist) {
// current をいったん partial/full に退避partial を優先)
if (h->partial_count < partial_limit) {
v4_page_push_partial(h, cur);
} else {
v4_page_push_full(h, cur);
}
h->current = NULL;
}
// partial から 1 ページだけ復帰
small_page_v4* from_partial = v4_page_pop_partial(h);
if (from_partial) {
h->current = from_partial;
return from_partial;
}
// Call direct Cold function (not vtable)
small_page_v4* page = small_cold_v4_refill_page(ctx, (uint32_t)class_idx);
if (!page) return NULL;
h->current = page;
return page;
}
void* small_heap_alloc_fast_v4(small_heap_ctx_v4* ctx, int class_idx) {
// Phase v4-mid-5: Add stats instrumentation
small_heap_v4_stat_alloc_call(class_idx);
// Phase v4-mid-2: C6-only full SmallHeapCtx v4 implementation
if (__builtin_expect(!v4_class_supported(class_idx), 0)) {
small_heap_v4_stat_alloc_fallback_pool(class_idx);
return NULL; // C5/C6/C7 以外は未対応
}
if (!small_heap_v4_class_enabled((uint8_t)class_idx)) {
small_heap_v4_stat_alloc_fallback_pool(class_idx);
return NULL;
}
small_class_heap_v4* h = &ctx->cls[class_idx];
small_page_v4* page = h->current;
// Try current page freelist
if (page && page->freelist) {
void* blk = page->freelist;
void* next = NULL;
memcpy(&next, blk, sizeof(void*));
page->freelist = next;
page->used++;
small_heap_v4_stat_alloc_success(class_idx);
return tiny_region_id_write_header(blk, class_idx);
}
// Current exhausted or NULL, try slow path (partial/refill)
page = small_alloc_slow_v4(ctx, class_idx);
if (!page || !page->freelist) {
small_heap_v4_stat_alloc_null_page(class_idx);
small_heap_v4_stat_alloc_fallback_pool(class_idx);
return NULL;
}
// Allocate from newly acquired/promoted page
void* blk = page->freelist;
void* next = NULL;
memcpy(&next, blk, sizeof(void*));
page->freelist = next;
page->used++;
small_heap_v4_stat_alloc_success(class_idx);
return tiny_region_id_write_header(blk, class_idx);
}
static void v4_unlink_from_list(small_class_heap_v4* h, v4_loc_t loc, small_page_v4* prev, small_page_v4* page) {
if (!h || !page) return;
switch (loc) {
case V4_LOC_CURRENT:
h->current = NULL;
break;
case V4_LOC_PARTIAL:
if (prev) prev->next = page->next;
else h->partial_head = page->next;
if (h->partial_count > 0) {
h->partial_count--;
}
break;
case V4_LOC_FULL:
if (prev) prev->next = page->next;
else h->full_head = page->next;
break;
default:
break;
}
page->next = NULL;
}
void small_heap_free_fast_v4(small_heap_ctx_v4* ctx, int class_idx, void* ptr) {
// Phase v4-mid-5: Add stats instrumentation
small_heap_v4_stat_free_call(class_idx);
// Phase v4-mid-2: C6-only full SmallHeapCtx v4 implementation
if (__builtin_expect(!v4_class_supported(class_idx), 0)) {
return;
}
if (!small_heap_v4_class_enabled((uint8_t)class_idx)) return;
if (!ptr) return;
// Phase v4-mid-6: ptr is already BASE (caller converts USER→BASE before calling us)
// See malloc_tiny_fast.h L254: base = ptr - 1, then L354/L282 passes base
void* base_ptr = ptr;
small_class_heap_v4* h = &ctx->cls[class_idx];
small_page_v4* prev = NULL;
v4_loc_t loc = V4_LOC_NONE;
// Try to find page in current/partial/full lists (using BASE pointer)
small_page_v4* page = v4_find_page(h, (const uint8_t*)base_ptr, &loc, &prev);
// Phase v4-mid-2: If page not found in v4 heap, try page_meta_of() for segment lookup
if (!page) {
small_heap_v4_stat_free_page_not_found(class_idx);
// Try to find via segment mask+shift (requires segment to be initialized)
// For now, this is a fallback for future segment-based allocation
// Return without freeing (pool v1 will handle)
return;
}
small_heap_v4_stat_free_page_found(class_idx);
const uint32_t partial_limit = v4_partial_limit(class_idx);
// freelist push (use BASE pointer, not USER pointer)
void* head = page->freelist;
memcpy(base_ptr, &head, sizeof(void*));
page->freelist = base_ptr;
if (page->used > 0) {
page->used--;
}
if (page->used == 0) {
if (loc != V4_LOC_CURRENT) {
v4_unlink_from_list(h, loc, prev, page);
}
if (!h->current) {
h->current = page;
page->next = NULL;
return;
}
if (h->current == page) {
page->next = NULL;
return;
}
if (h->partial_count < partial_limit) {
v4_page_push_partial(h, page);
return;
}
// Call direct Cold function (not vtable)
small_cold_v4_retire_page(ctx, page);
return;
}
if (!h->current) {
// このページを current に据える
if (loc != V4_LOC_CURRENT) {
v4_unlink_from_list(h, loc, prev, page);
}
h->current = page;
page->next = NULL;
} else if (loc == V4_LOC_FULL && page->freelist) {
// full → partial に戻すcurrent があっても partial 上限までは復帰)
v4_unlink_from_list(h, loc, prev, page);
if (h->partial_count < partial_limit) {
v4_page_push_partial(h, page);
} else {
v4_page_push_full(h, page); // 上限超なら戻す
}
}
}
// ============================================================================
// Stats dump (Phase v4-mid-5)
// ============================================================================
void small_heap_v4_stats_dump(void) {
if (!small_heap_v4_stats_enabled()) {
return;
}
fprintf(stderr, "\n========================================\n");
fprintf(stderr, "[SMALL_HEAP_V4_STATS] Summary\n");
fprintf(stderr, "========================================\n");
for (int c = 0; c < 8; c++) {
uint64_t alloc_calls = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_calls, memory_order_relaxed);
uint64_t alloc_success = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_success, memory_order_relaxed);
uint64_t alloc_null_page = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_null_page, memory_order_relaxed);
uint64_t alloc_fallback = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_fallback_pool, memory_order_relaxed);
uint64_t free_calls = atomic_load_explicit(&g_small_heap_v4_stats[c].free_calls, memory_order_relaxed);
uint64_t free_found = atomic_load_explicit(&g_small_heap_v4_stats[c].free_page_found, memory_order_relaxed);
uint64_t free_not_found = atomic_load_explicit(&g_small_heap_v4_stats[c].free_page_not_found, memory_order_relaxed);
if (alloc_calls > 0 || free_calls > 0) {
fprintf(stderr, "\nClass C%d:\n", c);
fprintf(stderr, " Alloc: calls=%lu success=%lu null_page=%lu fallback_pool=%lu\n",
(unsigned long)alloc_calls, (unsigned long)alloc_success,
(unsigned long)alloc_null_page, (unsigned long)alloc_fallback);
fprintf(stderr, " Free: calls=%lu page_found=%lu page_not_found=%lu\n",
(unsigned long)free_calls, (unsigned long)free_found,
(unsigned long)free_not_found);
}
}
fprintf(stderr, "========================================\n\n");
fflush(stderr);
}
// Automatic dump at program exit
static void small_heap_v4_stats_atexit(void) __attribute__((destructor));
static void small_heap_v4_stats_atexit(void) {
small_heap_v4_stats_dump();
}