// smallobject_hotbox_v5.c - SmallObject HotBox v5 Full Implementation (Phase v5-2) // // Phase v5-2: C6-only full implementation with segment-based allocation #include #include #include #include "box/smallsegment_v5_box.h" #include "box/smallobject_hotbox_v5_box.h" #include "box/smallobject_cold_iface_v5.h" #include "box/smallobject_v5_env_box.h" #include "tiny_region_id.h" // For HEADER_MAGIC and HEADER_CLASS_MASK #ifndef likely #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) #endif // TLS context static __thread SmallHeapCtxV5 g_small_heap_ctx_v5; static __thread int g_small_heap_ctx_v5_init = 0; SmallHeapCtxV5* small_heap_ctx_v5(void) { // Phase v5-4/v5-5/v5-6/v5-7: Lazy initialization of cached ENV flags if (unlikely(!g_small_heap_ctx_v5_init)) { g_small_heap_ctx_v5.header_mode = (uint8_t)small_heap_v5_header_mode(); g_small_heap_ctx_v5.tls_cache_enabled = small_heap_v5_tls_cache_enabled(); g_small_heap_ctx_v5.c6_cached_block = NULL; // Initialize cache to empty g_small_heap_ctx_v5.batch_enabled = small_heap_v5_batch_enabled(); g_small_heap_ctx_v5.c6_batch.count = 0; // Initialize batch to empty for (int i = 0; i < SMALL_V5_BATCH_CAP; i++) { g_small_heap_ctx_v5.c6_batch.slots[i] = NULL; } // Phase v5-7: ULTRA C6 initialization g_small_heap_ctx_v5.ultra_c6_enabled = small_heap_v5_ultra_c6_enabled(); g_small_heap_ctx_v5.c6_tls_count = 0; for (int i = 0; i < SMALL_V5_ULTRA_C6_CAP; i++) { g_small_heap_ctx_v5.c6_tls_freelist[i] = NULL; } g_small_heap_ctx_v5_init = 1; } return &g_small_heap_ctx_v5; } // Forward declarations for pool v1 fallback extern void* hak_pool_try_alloc(size_t size, uintptr_t site_id); extern void hak_pool_free(void* ptr, size_t size, uintptr_t site_id); // ============================================================================ // Helper: Slow path (refill from partial or cold) // ============================================================================ static SmallPageMetaV5* alloc_slow_v5(SmallHeapCtxV5* ctx, uint32_t class_idx) { SmallClassHeapV5* h = &ctx->cls[class_idx]; SmallPageMetaV5* cur = h->current; // If current exists but is exhausted, move to full list only // (exhausted pages are fully allocated, not partially free) if (cur && !cur->free_list) { SMALL_PAGE_V5_PUSH_FULL(h, cur); h->current = NULL; } // Try to pop from partial list (pages with some free blocks) SmallPageMetaV5* from_partial = SMALL_PAGE_V5_POP_PARTIAL(h); if (from_partial) { h->current = from_partial; return from_partial; } // Refill from cold interface (allocates new page) SmallPageMetaV5* page = small_cold_v5_refill_page(ctx, class_idx); if (!page) return NULL; h->current = page; return page; } // ============================================================================ // Phase v5-7: C6 ULTRA slow path helpers // ============================================================================ // ULTRA refill: Get blocks from segment and fill TLS freelist // Optimized: Batch page->used update, minimal loop overhead static void* small_alloc_slow_v5_c6_refill(SmallHeapCtxV5* ctx, uint32_t class_idx) { // Get page from existing slow path SmallPageMetaV5* page = alloc_slow_v5(ctx, class_idx); if (unlikely(!page || !page->free_list)) { // Cold refill failed, fallback to pool v1 extern void* hak_pool_try_alloc(size_t size, uintptr_t site_id); return hak_pool_try_alloc(SMALL_HEAP_V5_C6_BLOCK_SIZE, 0); } // Pre-compute header value const uint8_t desired_header = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK)); // Fill TLS freelist from page freelist (up to ULTRA_CAP) // Optimized: count filled blocks, batch update page->used at end int filled = 0; const int max_fill = SMALL_V5_ULTRA_C6_CAP - ctx->c6_tls_count; while (page->free_list && filled < max_fill) { void* blk = page->free_list; void* next; memcpy(&next, blk, sizeof(void*)); page->free_list = next; // Write header (required because freelist overwrites it) *((uint8_t*)blk) = desired_header; ctx->c6_tls_freelist[ctx->c6_tls_count++] = blk; filled++; } // Batch update page->used (one write instead of N) page->used += (uint16_t)filled; if (unlikely(filled == 0)) { extern void* hak_pool_try_alloc(size_t size, uintptr_t site_id); return hak_pool_try_alloc(SMALL_HEAP_V5_C6_BLOCK_SIZE, 0); } // Pop one and return (already has header written) void* ret = ctx->c6_tls_freelist[--ctx->c6_tls_count]; return (uint8_t*)ret + 1; // Return USER pointer } // ULTRA drain: Push TLS freelist back to page freelist // Optimized: batch page->used update, use page from arg when possible static void small_free_slow_v5_c6_drain(void* base_ptr, SmallHeapCtxV5* ctx, SmallPageMetaV5* page) { // Drain half of TLS freelist to make room int drain_count = ctx->c6_tls_count / 2; if (drain_count < 1) drain_count = 1; // Drain blocks back to their pages // Note: All blocks in TLS likely belong to the same page (common case) for (int i = 0; i < drain_count; i++) { void* blk = ctx->c6_tls_freelist[--ctx->c6_tls_count]; // blk is BASE pointer, look up its page SmallPageMetaV5* blk_page = small_segment_v5_page_meta_of((uint8_t*)blk + 1); if (likely(blk_page)) { // Push as BASE pointer (next at offset 0) void* head = blk_page->free_list; memcpy(blk, &head, sizeof(void*)); blk_page->free_list = blk; blk_page->used--; // Decrement used (no underflow check for speed) } } // Push the current block to TLS freelist ctx->c6_tls_freelist[ctx->c6_tls_count++] = base_ptr; } // ============================================================================ // Phase v5-2: Fast alloc (C6-only full implementation) // ============================================================================ void* small_alloc_fast_v5(size_t size, uint32_t class_idx, SmallHeapCtxV5* ctx) { (void)size; // Not used in fast path // C6-only check if (unlikely(class_idx != SMALL_HEAP_V5_C6_CLASS_IDX)) { // Fallback to pool v1 for non-C6 classes return hak_pool_try_alloc(size, 0); } // Phase v5-7: ULTRA fast path (C6 only, minimal branches) if (ctx->ultra_c6_enabled) { uint8_t cnt = ctx->c6_tls_count; if (likely(cnt > 0)) { // ULTRA fast: pop from TLS freelist (header already written at refill) ctx->c6_tls_count = cnt - 1; return (uint8_t*)ctx->c6_tls_freelist[cnt - 1] + 1; // Return USER pointer } // ULTRA slow: refill TLS freelist from page return small_alloc_slow_v5_c6_refill(ctx, class_idx); } // Phase v5-5: TLS cache hit path (C6 only) if (unlikely(ctx->tls_cache_enabled)) { void* cached = ctx->c6_cached_block; if (likely(cached != NULL)) { ctx->c6_cached_block = NULL; // Consume cache slot // NOTE: cached is BASE pointer (same as freelist format), convert to USER pointer // This is consistent with the free path which stores (ptr - 1) as BASE // Header mode handling (same logic as freelist path) uint8_t* header_ptr = (uint8_t*)cached; uint8_t desired_header = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK)); if (ctx->header_mode == SMALL_HEAP_V5_HEADER_MODE_LIGHT) { // light mode: only write if invalid uint8_t existing = *header_ptr; if (existing != desired_header) { *header_ptr = desired_header; } } else { // full mode: always write header *header_ptr = desired_header; } return header_ptr + 1; } } // Phase v5-6: Batch alloc path (C6 only, after cache) if (ctx->batch_enabled && class_idx == SMALL_HEAP_V5_C6_CLASS_IDX && ctx->c6_batch.count > 0) { uint8_t idx = --ctx->c6_batch.count; void* b = ctx->c6_batch.slots[idx]; ctx->c6_batch.slots[idx] = NULL; // b is BASE pointer, return based on header mode if (ctx->header_mode == SMALL_HEAP_V5_HEADER_MODE_LIGHT) { return (uint8_t*)b + 1; } else { // full mode: write header uint8_t* header_ptr = (uint8_t*)b; uint8_t desired_header = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK)); *header_ptr = desired_header; return header_ptr + 1; } } // Cache miss - proceed to existing page_meta path SmallClassHeapV5* h = &ctx->cls[SMALL_HEAP_V5_C6_CLASS_IDX]; SmallPageMetaV5* page = h->current; // Fast path: Try current page freelist if (likely(page && page->free_list)) { void* blk = page->free_list; void* next = NULL; memcpy(&next, blk, sizeof(void*)); page->free_list = next; page->used++; // Phase v5-4: Header mode handling uint8_t* header_ptr = (uint8_t*)blk; uint8_t desired_header = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK)); if (ctx->header_mode == SMALL_HEAP_V5_HEADER_MODE_LIGHT) { // light mode: only write header if it's invalid/incorrect // This saves redundant writes when blocks are recycled uint8_t existing = *header_ptr; if (existing != desired_header) { *header_ptr = desired_header; } } else { // full mode: always write header (safety first) *header_ptr = desired_header; } return header_ptr + 1; } // Slow path: Current exhausted or NULL page = alloc_slow_v5(ctx, class_idx); if (unlikely(!page || !page->free_list)) { // Cold refill failed, fallback to pool v1 return hak_pool_try_alloc(size, 0); } // Allocate from newly acquired page void* blk = page->free_list; void* next = NULL; memcpy(&next, blk, sizeof(void*)); page->free_list = next; page->used++; // Phase v5-4: Header mode handling (same logic as fast path) uint8_t* header_ptr = (uint8_t*)blk; uint8_t desired_header = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK)); if (ctx->header_mode == SMALL_HEAP_V5_HEADER_MODE_LIGHT) { // light mode: only write if invalid uint8_t existing = *header_ptr; if (existing != desired_header) { *header_ptr = desired_header; } } else { // full mode: always write header *header_ptr = desired_header; } return header_ptr + 1; } // ============================================================================ // Helper: Determine page location in heap lists (Phase v5-3) // ============================================================================ static inline page_loc_t get_page_location(SmallClassHeapV5* h, SmallPageMetaV5* page, SmallPageMetaV5** prev_out) { if (prev_out) *prev_out = NULL; if (!h || !page) return LOC_NONE; // Check current (O(1)) if (h->current == page) { return LOC_CURRENT; } // Check partial list (typically 0-1 pages in v5-3) SmallPageMetaV5* prev = NULL; for (SmallPageMetaV5* p = h->partial_head; p; prev = p, p = p->next) { if (p == page) { if (prev_out) *prev_out = prev; return LOC_PARTIAL; } } // Check full list prev = NULL; for (SmallPageMetaV5* p = h->full_head; p; prev = p, p = p->next) { if (p == page) { if (prev_out) *prev_out = prev; return LOC_FULL; } } return LOC_NONE; } // ============================================================================ // Phase v5-7: Lightweight segment check (faster than page_meta_of) // ============================================================================ // Import from smallsegment_v5.c extern int small_segment_v5_owns_ptr_fast(void* ptr); // ============================================================================ // Phase v5-3: Fast free (C6-only O(1) implementation) // ============================================================================ void small_free_fast_v5(void* ptr, uint32_t class_idx, SmallHeapCtxV5* ctx) { if (unlikely(!ptr)) { return; } // C6-only check if (unlikely(class_idx != SMALL_HEAP_V5_C6_CLASS_IDX)) { hak_pool_free(ptr, 0, 0); return; } // Phase v5-7: ULTRA free path - skip page_meta_of for fast path if (ctx->ultra_c6_enabled) { // Quick segment ownership check (no page_meta access) if (likely(small_segment_v5_owns_ptr_fast(ptr))) { uint8_t cnt = ctx->c6_tls_count; if (likely(cnt < SMALL_V5_ULTRA_C6_CAP)) { // ULTRA fast: push to TLS freelist (no page_meta touch) ctx->c6_tls_freelist[cnt] = (uint8_t*)ptr - 1; // Store BASE ctx->c6_tls_count = cnt + 1; return; } // ULTRA slow: need page_meta for drain SmallPageMetaV5* page = small_segment_v5_page_meta_of(ptr); if (page) { small_free_slow_v5_c6_drain((uint8_t*)ptr - 1, ctx, page); return; } } // Not in v5 segment, fallback to pool v1 hak_pool_free(ptr, 0, 0); return; } // Non-ULTRA path: need page_meta_of SmallPageMetaV5* page = small_segment_v5_page_meta_of(ptr); if (unlikely(!page)) { // Not in v5 segment, fallback to pool v1 hak_pool_free(ptr, 0, 0); return; } SmallClassHeapV5* h = &ctx->cls[SMALL_HEAP_V5_C6_CLASS_IDX]; // Phase v5-5: TLS cache refill path (before pushing to freelist) if (unlikely(ctx->tls_cache_enabled)) { if (ctx->c6_cached_block == NULL) { // Cache is empty, refill it with this block // NOTE: ptr is USER pointer, convert to BASE pointer for cache storage // (consistent with freelist storage format) void* base = (uint8_t*)ptr - 1; ctx->c6_cached_block = base; // IMPORTANT: Do NOT decrement page->used here! // The cached block is still logically "allocated" until it's: // - consumed during alloc (at which point it becomes allocated again) // - evicted to freelist (at which point page->used is decremented) // This prevents premature page retirement while holding a cached reference return; } // Cache full - evict cached block to freelist first, then cache this one else { void* evicted = ctx->c6_cached_block; // Evicted block is BASE pointer, convert to USER pointer for freelist push void* evicted_user = (uint8_t*)evicted + 1; // Look up the page for the evicted block (might be different from current page) SmallPageMetaV5* evicted_page = small_segment_v5_page_meta_of(evicted_user); if (evicted_page) { // Push evicted block to its page's freelist void* evicted_head = evicted_page->free_list; memcpy(evicted_user, &evicted_head, sizeof(void*)); evicted_page->free_list = evicted_user; if (evicted_page->used > 0) { evicted_page->used--; } // Note: We don't handle empty page transition here for evicted page // to keep this path fast. Empty pages will be handled on next alloc/free. } // Now cache the new block void* base = (uint8_t*)ptr - 1; ctx->c6_cached_block = base; return; } } // Phase v5-6: Batch free path (C6 only, after cache, before freelist) SmallV5Batch* batch = &ctx->c6_batch; if (ctx->batch_enabled && class_idx == SMALL_HEAP_V5_C6_CLASS_IDX && batch->count < SMALL_V5_BATCH_CAP) { // ptr is USER pointer, convert to BASE pointer for batch storage void* base = (uint8_t*)ptr - 1; batch->slots[batch->count++] = base; return; } // Cache disabled or batch full - push to freelist (standard path) void* head = page->free_list; memcpy(ptr, &head, sizeof(void*)); page->free_list = ptr; if (page->used > 0) { page->used--; } // Handle empty page (used == 0) if (page->used == 0) { // Fast path: if this is current, just keep it if (h->current == page) { return; } // Determine location and unlink (rare path) SmallPageMetaV5* prev = NULL; page_loc_t loc = get_page_location(h, page, &prev); if (loc != LOC_NONE && loc != LOC_CURRENT) { SMALL_PAGE_V5_UNLINK(h, loc, prev, page); } // Promote to current if empty if (!h->current) { h->current = page; page->next = NULL; return; } // Try partial (limit 1) if (h->partial_count < SMALL_HEAP_V5_C6_PARTIAL_LIMIT) { SMALL_PAGE_V5_PUSH_PARTIAL(h, page); return; } // Retire to cold small_cold_v5_retire_page(ctx, page); return; } // Page not empty - handle full→partial transition if (h->current != page) { SmallPageMetaV5* prev = NULL; page_loc_t loc = get_page_location(h, page, &prev); if (loc == LOC_FULL && page->free_list) { // Move from full to partial SMALL_PAGE_V5_UNLINK(h, loc, prev, page); if (h->partial_count < SMALL_HEAP_V5_C6_PARTIAL_LIMIT) { SMALL_PAGE_V5_PUSH_PARTIAL(h, page); } else { SMALL_PAGE_V5_PUSH_FULL(h, page); } } else if (!h->current) { // No current, promote this if (loc != LOC_NONE) { SMALL_PAGE_V5_UNLINK(h, loc, prev, page); } h->current = page; page->next = NULL; } } } // ============================================================================ // Helper: C6 block size query // ============================================================================ uint32_t small_heap_v5_c6_block_size(void) { return SMALL_HEAP_V5_C6_BLOCK_SIZE; }