// smallobject_hotbox_v4.c - SmallObject HotHeap v4 (C5/C6/C7 opt-in) // // Phase v4-3.1: C7 は v4 独自の freelist/current/partial で完結。C6/C5 は強ゲート付きで同形パスを使う。 #include #include #ifndef likely #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) #endif #include "box/smallobject_hotbox_v4_box.h" #include "box/smallobject_hotbox_v4_env_box.h" #include "box/smallobject_hotbox_v4_stats_box.h" #include "box/smallobject_cold_iface_v4.h" #include "box/smallobject_hotbox_v3_env_box.h" #include "box/tiny_heap_box.h" #include "box/smallsegment_v4_box.h" #include "box/smallsegment_v4_env_box.h" #include "box/tiny_cold_iface_v1.h" #include "box/tiny_geometry_box.h" #include "tiny_region_id.h" // ============================================================================ // Stats storage (Phase v4-mid-5) // ============================================================================ small_heap_v4_class_stats_t g_small_heap_v4_stats[8]; // ============================================================================ // v4 Segment Configuration (Phase v4-mid-0+) // ============================================================================ #define SMALL_SEGMENT_V4_SIZE (2 * 1024 * 1024) // 2 MiB segment #define SMALL_SEGMENT_V4_PAGE_SIZE (64 * 1024) // 64 KiB page #define SMALL_SEGMENT_V4_MAGIC 0xDEADBEEF #define SMALL_SEGMENT_V4_PAGE_SHIFT 16 // log2(64KiB) // TLS context static __thread small_heap_ctx_v4 g_ctx_v4; // Phase v4-mid-6: C6 TLS Fastlist static __thread SmallC6FastState g_small_c6_fast; static inline SmallC6FastState* small_c6_fast_state(void) { return &g_small_c6_fast; } // Internal segment structure (internal use only, not exposed via public box API) typedef struct small_segment_v4_internal { int class_idx; size_t segment_size; tiny_heap_ctx_t* tiny_ctx; } small_segment_v4_internal; static __thread small_segment_v4_internal g_segments_v4[SMALLOBJECT_NUM_CLASSES]; small_heap_ctx_v4* small_heap_ctx_v4_get(void) { return &g_ctx_v4; } static small_page_v4* v4_page_from_lease(tiny_heap_page_t* lease, int class_idx, small_segment_v4* seg); // ----------------------------------------------------------------------------- // helpers // ----------------------------------------------------------------------------- static inline int v4_class_supported(int class_idx) { return class_idx == 7 || class_idx == 6 || class_idx == 5; } static size_t smallsegment_v4_default_size(void) { const char* env = smallsegment_v4_size_env(); if (env && *env) { size_t v = strtoull(env, NULL, 0); if (v > (size_t)(64 * 1024)) { return v; } } return (size_t)(2 * 1024 * 1024); // default 2MiB segment単位(将来の実装用) } small_segment_v4* smallsegment_v4_acquire(int class_idx) { if (!v4_class_supported(class_idx)) return NULL; small_segment_v4_internal* seg = &g_segments_v4[class_idx]; seg->class_idx = class_idx; if (!seg->segment_size) { seg->segment_size = smallsegment_v4_default_size(); } if (!seg->tiny_ctx) { seg->tiny_ctx = tiny_heap_ctx_for_thread(); } return (small_segment_v4*)seg; } void* smallsegment_v4_alloc_page(small_segment_v4* seg, int class_idx) { if (!seg || !v4_class_supported(class_idx)) return NULL; // Internal use only: cast to internal type to access tiny_ctx small_segment_v4_internal* int_seg = (small_segment_v4_internal*)seg; if (!int_seg->tiny_ctx) { int_seg->tiny_ctx = tiny_heap_ctx_for_thread(); } tiny_heap_ctx_t* tctx = int_seg->tiny_ctx ? int_seg->tiny_ctx : tiny_heap_ctx_for_thread(); if (!tctx) return NULL; tiny_heap_page_t* lease = tiny_heap_prepare_page(tctx, class_idx); if (!lease) return NULL; int_seg->tiny_ctx = tctx; return v4_page_from_lease(lease, class_idx, seg); } void smallsegment_v4_release_if_empty(small_segment_v4* seg, void* page_ptr, int class_idx) { small_page_v4* page = (small_page_v4*)page_ptr; if (!page || !v4_class_supported(class_idx)) return; tiny_heap_ctx_t* tctx = tiny_heap_ctx_for_thread(); tiny_heap_page_t* lease = (tiny_heap_page_t*)page->slab_ref; if (tctx && lease) { tiny_heap_page_becomes_empty(tctx, class_idx, lease); } free(page); } static inline void v4_page_push_partial(small_class_heap_v4* h, small_page_v4* page) { if (!h || !page) return; page->next = h->partial_head; h->partial_head = page; h->partial_count++; } static inline small_page_v4* v4_page_pop_partial(small_class_heap_v4* h) { if (!h) return NULL; small_page_v4* p = h->partial_head; if (p) { h->partial_head = p->next; p->next = NULL; if (h->partial_count > 0) { h->partial_count--; } } return p; } static inline void v4_page_push_full(small_class_heap_v4* h, small_page_v4* page) { if (!h || !page) return; page->next = h->full_head; h->full_head = page; } static inline uint32_t v4_partial_limit(int class_idx) { // C7 は refill/retire を抑えるため少し広めに保持 return (class_idx == 7) ? 2u : 1u; } static inline int v4_ptr_in_page(const small_page_v4* page, const uint8_t* ptr) { if (!page || !ptr) return 0; uint8_t* base = page->base; size_t span = (size_t)page->block_size * (size_t)page->capacity; if (ptr < base || ptr >= base + span) return 0; size_t off = (size_t)(ptr - base); return (off % page->block_size) == 0; } static inline void* v4_build_freelist(uint8_t* base, uint16_t capacity, size_t stride) { void* head = NULL; for (int i = capacity - 1; i >= 0; i--) { uint8_t* blk = base + ((size_t)i * stride); void* next = head; head = blk; memcpy(blk, &next, sizeof(void*)); } return head; } typedef enum { V4_LOC_NONE = 0, V4_LOC_CURRENT, V4_LOC_PARTIAL, V4_LOC_FULL, } v4_loc_t; static small_page_v4* v4_find_page(small_class_heap_v4* h, const uint8_t* ptr, v4_loc_t* loc, small_page_v4** prev_out) { if (loc) *loc = V4_LOC_NONE; if (prev_out) *prev_out = NULL; if (!h || !ptr) return NULL; if (h->current && v4_ptr_in_page(h->current, ptr)) { if (loc) *loc = V4_LOC_CURRENT; return h->current; } small_page_v4* prev = NULL; for (small_page_v4* p = h->partial_head; p; prev = p, p = p->next) { if (v4_ptr_in_page(p, ptr)) { if (loc) *loc = V4_LOC_PARTIAL; if (prev_out) *prev_out = prev; return p; } } for (small_page_v4* p = h->full_head; p; prev = p, p = p->next) { if (v4_ptr_in_page(p, ptr)) { if (loc) *loc = V4_LOC_FULL; if (prev_out) *prev_out = prev; return p; } } return NULL; } int smallobject_hotbox_v4_can_own(int class_idx, void* ptr) { if (__builtin_expect(!v4_class_supported(class_idx), 0)) return 0; if (!small_heap_v4_class_enabled((uint8_t)class_idx)) return 0; if (!ptr) return 0; small_heap_ctx_v4* ctx = small_heap_ctx_v4_get(); if (!ctx) return 0; small_class_heap_v4* h = &ctx->cls[class_idx]; return v4_find_page(h, (const uint8_t*)ptr, NULL, NULL) != NULL; } // ----------------------------------------------------------------------------- // Cold iface (C5/C6/C7, Tiny v1 経由) // ----------------------------------------------------------------------------- static small_page_v4* v4_page_from_lease(tiny_heap_page_t* lease, int class_idx, small_segment_v4* seg) { if (!lease) return NULL; small_page_v4* page = (small_page_v4*)malloc(sizeof(small_page_v4)); if (!page) return NULL; memset(page, 0, sizeof(*page)); page->class_idx = (uint8_t)class_idx; page->capacity = lease->capacity; page->used = 0; page->block_size = (uint32_t)tiny_stride_for_class((int)class_idx); page->base = lease->base; page->slab_ref = lease; page->segment = seg; page->freelist = v4_build_freelist(lease->base, lease->capacity, page->block_size); if (!page->freelist) { free(page); return NULL; } page->next = NULL; page->flags = 0; return page; } static small_page_v4* cold_refill_page_v4(small_heap_ctx_v4* hot_ctx, uint32_t class_idx) { if (__builtin_expect(!v4_class_supported((int)class_idx), 0)) return NULL; (void)hot_ctx; if (smallsegment_v4_enabled()) { small_segment_v4* seg = smallsegment_v4_acquire((int)class_idx); return (small_page_v4*)smallsegment_v4_alloc_page(seg, (int)class_idx); } tiny_heap_ctx_t* tctx = tiny_heap_ctx_for_thread(); if (!tctx) return NULL; // Phase v4-mid-6: Get a fresh page from TinyHeap tiny_heap_page_t* lease = tiny_heap_prepare_page(tctx, (int)class_idx); if (!lease) return NULL; // Clear TinyHeap's current so next call gets fresh page tiny_heap_class_t* hcls = tiny_heap_class(tctx, (int)class_idx); if (hcls) { tiny_heap_class_unlink(hcls, lease); } return v4_page_from_lease(lease, (int)class_idx, NULL); } static void cold_retire_page_v4(small_heap_ctx_v4* hot_ctx, uint32_t class_idx, small_page_v4* page) { (void)hot_ctx; if (!page) return; if (smallsegment_v4_enabled()) { small_segment_v4* seg = (small_segment_v4*)page->segment; smallsegment_v4_release_if_empty(seg, page, (int)class_idx); return; } tiny_heap_ctx_t* tctx = tiny_heap_ctx_for_thread(); tiny_heap_page_t* lease = (tiny_heap_page_t*)page->slab_ref; if (tctx && lease) { tiny_heap_page_becomes_empty(tctx, (int)class_idx, lease); } free(page); } // Direct function implementations (phase v4-mid-0: cold_refill/retire を直接呼び出す) small_page_v4* small_cold_v4_refill_page(small_heap_ctx_v4* ctx, uint32_t class_idx) { return cold_refill_page_v4(ctx, class_idx); } void small_cold_v4_retire_page(small_heap_ctx_v4* ctx, small_page_v4* page) { if (!page) return; cold_retire_page_v4(ctx, (uint32_t)page->class_idx, page); } bool small_cold_v4_remote_push(small_page_v4* page, void* ptr, uint32_t tid) { (void)page; (void)ptr; (void)tid; return false; // stub: not yet implemented } void small_cold_v4_remote_drain(small_heap_ctx_v4* ctx) { (void)ctx; // stub: not yet implemented } // ============================================================================ // smallsegment_v4_page_meta_of: Pointer → Page metadata lookup // ============================================================================ // Phase v4-mid-1: Implement mask+shift O(1) lookup for Fail-Fast validation. // // Algorithm: // 1. Compute segment base: addr & ~(SMALL_SEGMENT_V4_SIZE - 1) // 2. Verify magic number // 3. Compute page_idx: (addr - seg_base) >> SMALL_SEGMENT_V4_PAGE_SHIFT // 4. Return &seg->page_meta[page_idx] or NULL small_page_v4* smallsegment_v4_page_meta_of(small_segment_v4* seg, void* ptr) { if (!seg || !ptr) { return NULL; } uintptr_t addr = (uintptr_t)ptr; uintptr_t seg_base = addr & ~(SMALL_SEGMENT_V4_SIZE - 1); // Verify segment pointer and magic SmallSegment* s = (SmallSegment*)seg_base; if (!s || s->magic != SMALL_SEGMENT_V4_MAGIC) { return NULL; } // Compute page index and bounds check size_t page_idx = (addr - seg_base) >> SMALL_SEGMENT_V4_PAGE_SHIFT; if (page_idx >= s->num_pages) { return NULL; } // Return page metadata (computed as flexible array offset) // Note: For now, just return a non-NULL marker. // Actual page_meta[] array will be implemented in Phase v4-mid-2. return (SmallPageMeta*)(1); // Non-NULL sentinel for now } // ----------------------------------------------------------------------------- // alloc/free // ----------------------------------------------------------------------------- static small_page_v4* small_alloc_slow_v4(small_heap_ctx_v4* ctx, int class_idx) { small_class_heap_v4* h = &ctx->cls[class_idx]; const uint32_t partial_limit = v4_partial_limit(class_idx); small_page_v4* cur = h->current; if (cur && cur->freelist) { return cur; // usable current } if (cur && !cur->freelist) { // current をいったん partial/full に退避(partial を優先) if (h->partial_count < partial_limit) { v4_page_push_partial(h, cur); } else { v4_page_push_full(h, cur); } h->current = NULL; } // partial から 1 ページだけ復帰 small_page_v4* from_partial = v4_page_pop_partial(h); if (from_partial) { h->current = from_partial; return from_partial; } // Call direct Cold function (not vtable) small_page_v4* page = small_cold_v4_refill_page(ctx, (uint32_t)class_idx); if (!page) return NULL; h->current = page; return page; } void* small_heap_alloc_fast_v4(small_heap_ctx_v4* ctx, int class_idx) { // Phase v4-mid-5: Add stats instrumentation small_heap_v4_stat_alloc_call(class_idx); // Phase v4-mid-6: C6 Fastlist Path if (class_idx == 6 && small_heap_v4_fastlist_enabled()) { SmallC6FastState* s = &g_small_c6_fast; if (likely(s->freelist)) { void* b = s->freelist; s->freelist = *(void**)b; s->used++; small_heap_v4_stat_alloc_success(class_idx); return tiny_region_id_write_header(b, class_idx); } // Fastlist empty: sync used back to meta before slow path if (s->meta) { s->meta->used = (uint16_t)s->used; } } // Phase v4-mid-2: C6-only full SmallHeapCtx v4 implementation if (__builtin_expect(!v4_class_supported(class_idx), 0)) { small_heap_v4_stat_alloc_fallback_pool(class_idx); return NULL; // C5/C6/C7 以外は未対応 } if (!small_heap_v4_class_enabled((uint8_t)class_idx)) { small_heap_v4_stat_alloc_fallback_pool(class_idx); return NULL; } small_class_heap_v4* h = &ctx->cls[class_idx]; small_page_v4* page = h->current; // Try current page freelist if (page && page->freelist) { void* blk = page->freelist; void* next = NULL; memcpy(&next, blk, sizeof(void*)); page->freelist = next; page->used++; small_heap_v4_stat_alloc_success(class_idx); return tiny_region_id_write_header(blk, class_idx); } // Current exhausted or NULL, try slow path (partial/refill) page = small_alloc_slow_v4(ctx, class_idx); if (!page || !page->freelist) { small_heap_v4_stat_alloc_null_page(class_idx); small_heap_v4_stat_alloc_fallback_pool(class_idx); return NULL; } // Phase v4-mid-6: Promote to C6 Fastlist if (class_idx == 6 && small_heap_v4_fastlist_enabled()) { if (!page) { // Should not happen } else if (!page->freelist) { return NULL; } else { SmallC6FastState* s = &g_small_c6_fast; s->meta = page; s->page_base = page->base; s->capacity = page->capacity; s->used = page->used; s->freelist = page->freelist; page->freelist = NULL; // Steal freelist ownership // Retry fast path if (likely(s->freelist)) { void* b = s->freelist; s->freelist = *(void**)b; s->used++; small_heap_v4_stat_alloc_success(class_idx); return tiny_region_id_write_header(b, class_idx); } } } // Allocate from newly acquired/promoted page void* blk = page->freelist; void* next = NULL; memcpy(&next, blk, sizeof(void*)); page->freelist = next; page->used++; small_heap_v4_stat_alloc_success(class_idx); return tiny_region_id_write_header(blk, class_idx); } static void v4_unlink_from_list(small_class_heap_v4* h, v4_loc_t loc, small_page_v4* prev, small_page_v4* page) { if (!h || !page) return; switch (loc) { case V4_LOC_CURRENT: h->current = NULL; break; case V4_LOC_PARTIAL: if (prev) prev->next = page->next; else h->partial_head = page->next; if (h->partial_count > 0) { h->partial_count--; } break; case V4_LOC_FULL: if (prev) prev->next = page->next; else h->full_head = page->next; break; default: break; } page->next = NULL; } extern void hak_pool_free(void* ptr, size_t size, uintptr_t site_id); void small_heap_free_fast_v4(small_heap_ctx_v4* ctx, int class_idx, void* ptr) { // Phase v4-mid-5: Add stats instrumentation small_heap_v4_stat_free_call(class_idx); // Phase v4-mid-6: C6 Fastlist Path if (class_idx == 6 && small_heap_v4_fastlist_enabled()) { SmallC6FastState* s = &g_small_c6_fast; if (s->page_base && (uintptr_t)ptr >= (uintptr_t)s->page_base) { // Use actual block size from meta uint32_t bsize = (s->meta) ? s->meta->block_size : 512; size_t span = (size_t)s->capacity * bsize; if ((uintptr_t)ptr < (uintptr_t)s->page_base + span) { *(void**)ptr = s->freelist; s->freelist = ptr; s->used--; small_heap_v4_stat_free_page_found(class_idx); return; } } } // Phase v4-mid-2: C6-only full SmallHeapCtx v4 implementation if (__builtin_expect(!v4_class_supported(class_idx), 0)) { return; } if (!small_heap_v4_class_enabled((uint8_t)class_idx)) return; if (!ptr) return; // Phase v4-mid-6: ptr is already BASE (caller converts USER→BASE before calling us) // See malloc_tiny_fast.h L254: base = ptr - 1, then L354/L282 passes base void* base_ptr = ptr; small_class_heap_v4* h = &ctx->cls[class_idx]; small_page_v4* prev = NULL; v4_loc_t loc = V4_LOC_NONE; // Try to find page in current/partial/full lists (using BASE pointer) small_page_v4* page = v4_find_page(h, (const uint8_t*)base_ptr, &loc, &prev); // Phase v4-mid-2: If page not found in v4 heap, try page_meta_of() for segment lookup if (!page) { small_heap_v4_stat_free_page_not_found(class_idx); // Try to find via segment mask+shift (requires segment to be initialized) // For now, this is a fallback for future segment-based allocation // Fallback to pool v1 (avoid recursion via free()) hak_pool_free(base_ptr, 0, 0); return; } small_heap_v4_stat_free_page_found(class_idx); const uint32_t partial_limit = v4_partial_limit(class_idx); // freelist push (use BASE pointer, not USER pointer) void* head = page->freelist; memcpy(base_ptr, &head, sizeof(void*)); page->freelist = base_ptr; if (page->used > 0) { page->used--; } if (page->used == 0) { if (loc != V4_LOC_CURRENT) { v4_unlink_from_list(h, loc, prev, page); } if (!h->current) { h->current = page; page->next = NULL; return; } if (h->current == page) { page->next = NULL; return; } if (h->partial_count < partial_limit) { v4_page_push_partial(h, page); return; } // Call direct Cold function (not vtable) small_cold_v4_retire_page(ctx, page); return; } if (!h->current) { // このページを current に据える if (loc != V4_LOC_CURRENT) { v4_unlink_from_list(h, loc, prev, page); } h->current = page; page->next = NULL; } else if (loc == V4_LOC_FULL && page->freelist) { // full → partial に戻す(current があっても partial 上限までは復帰) v4_unlink_from_list(h, loc, prev, page); if (h->partial_count < partial_limit) { v4_page_push_partial(h, page); } else { v4_page_push_full(h, page); // 上限超なら戻す } } } // ============================================================================ // Stats dump (Phase v4-mid-5) // ============================================================================ void small_heap_v4_stats_dump(void) { if (!small_heap_v4_stats_enabled()) { return; } fprintf(stderr, "\n========================================\n"); fprintf(stderr, "[SMALL_HEAP_V4_STATS] Summary\n"); fprintf(stderr, "========================================\n"); for (int c = 0; c < 8; c++) { uint64_t alloc_calls = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_calls, memory_order_relaxed); uint64_t alloc_success = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_success, memory_order_relaxed); uint64_t alloc_null_page = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_null_page, memory_order_relaxed); uint64_t alloc_fallback = atomic_load_explicit(&g_small_heap_v4_stats[c].alloc_fallback_pool, memory_order_relaxed); uint64_t free_calls = atomic_load_explicit(&g_small_heap_v4_stats[c].free_calls, memory_order_relaxed); uint64_t free_found = atomic_load_explicit(&g_small_heap_v4_stats[c].free_page_found, memory_order_relaxed); uint64_t free_not_found = atomic_load_explicit(&g_small_heap_v4_stats[c].free_page_not_found, memory_order_relaxed); if (alloc_calls > 0 || free_calls > 0) { fprintf(stderr, "\nClass C%d:\n", c); fprintf(stderr, " Alloc: calls=%lu success=%lu null_page=%lu fallback_pool=%lu\n", (unsigned long)alloc_calls, (unsigned long)alloc_success, (unsigned long)alloc_null_page, (unsigned long)alloc_fallback); fprintf(stderr, " Free: calls=%lu page_found=%lu page_not_found=%lu\n", (unsigned long)free_calls, (unsigned long)free_found, (unsigned long)free_not_found); } } fprintf(stderr, "========================================\n\n"); fflush(stderr); } // Automatic dump at program exit static void small_heap_v4_stats_atexit(void) __attribute__((destructor)); static void small_heap_v4_stats_atexit(void) { small_heap_v4_stats_dump(); }