// smallobject_hotbox_v3.c - SmallObject HotHeap v3 skeleton (C7-first) // Phase A/B: 型と stats だけ。alloc/free は v1 にフォールバックさせる。 #include #include #include #include "box/smallobject_hotbox_v3_box.h" #include "box/smallobject_cold_iface_v1.h" #include "box/tiny_heap_box.h" #include "box/tiny_front_v3_env_box.h" #include "hakmem_tiny.h" // TINY_SLAB_SIZE mask for page_of #include "tiny_region_id.h" static __thread so_ctx_v3* g_so_ctx_v3; static int g_so_stats_enabled = -1; static so_stats_class_v3 g_so_stats[SMALLOBJECT_NUM_CLASSES]; int so_v3_stats_enabled(void) { if (__builtin_expect(g_so_stats_enabled == -1, 0)) { const char* e = getenv("HAKMEM_SMALL_HEAP_V3_STATS"); g_so_stats_enabled = (e && *e && *e != '0') ? 1 : 0; } return g_so_stats_enabled; } static inline so_stats_class_v3* so_stats_for(uint8_t ci) { if (!so_v3_stats_enabled()) return NULL; if (ci >= SMALLOBJECT_NUM_CLASSES) return NULL; return &g_so_stats[ci]; } void so_v3_record_route_hit(uint8_t ci) { so_stats_class_v3* st = so_stats_for(ci); if (st) atomic_fetch_add_explicit(&st->route_hits, 1, memory_order_relaxed); } void so_v3_record_alloc_call(uint8_t ci) { so_stats_class_v3* st = so_stats_for(ci); if (st) atomic_fetch_add_explicit(&st->alloc_calls, 1, memory_order_relaxed); } void so_v3_record_alloc_refill(uint8_t ci) { so_stats_class_v3* st = so_stats_for(ci); if (st) atomic_fetch_add_explicit(&st->alloc_refill, 1, memory_order_relaxed); } void so_v3_record_alloc_fallback(uint8_t ci) { so_stats_class_v3* st = so_stats_for(ci); if (st) atomic_fetch_add_explicit(&st->alloc_fallback_v1, 1, memory_order_relaxed); } void so_v3_record_alloc_current_hit(uint8_t ci) { so_stats_class_v3* st = so_stats_for(ci); if (st) atomic_fetch_add_explicit(&st->alloc_current_hit, 1, memory_order_relaxed); } void so_v3_record_alloc_partial_hit(uint8_t ci) { so_stats_class_v3* st = so_stats_for(ci); if (st) atomic_fetch_add_explicit(&st->alloc_partial_hit, 1, memory_order_relaxed); } void so_v3_record_free_call(uint8_t ci) { so_stats_class_v3* st = so_stats_for(ci); if (st) atomic_fetch_add_explicit(&st->free_calls, 1, memory_order_relaxed); } void so_v3_record_free_fallback(uint8_t ci) { so_stats_class_v3* st = so_stats_for(ci); if (st) atomic_fetch_add_explicit(&st->free_fallback_v1, 1, memory_order_relaxed); } void so_v3_record_free_current(uint8_t ci) { so_stats_class_v3* st = so_stats_for(ci); if (st) atomic_fetch_add_explicit(&st->free_current, 1, memory_order_relaxed); } void so_v3_record_free_partial(uint8_t ci) { so_stats_class_v3* st = so_stats_for(ci); if (st) atomic_fetch_add_explicit(&st->free_partial, 1, memory_order_relaxed); } void so_v3_record_free_retire(uint8_t ci) { so_stats_class_v3* st = so_stats_for(ci); if (st) atomic_fetch_add_explicit(&st->free_retire, 1, memory_order_relaxed); } void so_v3_record_page_of_fail(uint8_t ci) { so_stats_class_v3* st = so_stats_for(ci); if (st) atomic_fetch_add_explicit(&st->page_of_fail, 1, memory_order_relaxed); } so_ctx_v3* so_tls_get(void) { so_ctx_v3* ctx = g_so_ctx_v3; if (__builtin_expect(ctx == NULL, 0)) { ctx = (so_ctx_v3*)calloc(1, sizeof(so_ctx_v3)); if (!ctx) { fprintf(stderr, "[SMALL_HEAP_V3] TLS alloc failed\n"); abort(); } for (int i = 0; i < SMALLOBJECT_NUM_CLASSES; i++) { so_class_v3* hc = &ctx->cls[i]; hc->block_size = (uint32_t)tiny_stride_for_class(i); hc->max_partial_pages = 2; } g_so_ctx_v3 = ctx; } return ctx; } static inline void* so_build_freelist(so_page_v3* page) { if (!page || !page->base || page->block_size == 0 || page->capacity == 0) return NULL; uint8_t* base = (uint8_t*)page->base; void* head = NULL; for (uint32_t i = 0; i < page->capacity; i++) { uint8_t* blk = base + ((size_t)i * page->block_size); *(void**)blk = head; head = blk; } return head; } static inline int so_ptr_in_page(so_page_v3* page, void* ptr) { if (!page || !ptr) return 0; uintptr_t base = (uintptr_t)page->base; uintptr_t p = (uintptr_t)ptr; uintptr_t span = (uintptr_t)page->block_size * (uintptr_t)page->capacity; if (p < base || p >= base + span) return 0; if (((p - base) % page->block_size) != 0) return 0; return 1; } static inline so_page_v3* so_page_of(so_class_v3* hc, void* ptr) { if (!ptr || !hc) return NULL; so_page_v3* page = hc->current; if (page && so_ptr_in_page(page, ptr)) { return page; } page = hc->partial; while (page) { if (so_ptr_in_page(page, ptr)) { return page; } page = page->next; } return NULL; } static inline void so_page_push_partial(so_class_v3* hc, so_page_v3* page) { if (!hc || !page) return; page->next = hc->partial; hc->partial = page; hc->partial_count++; } static inline void so_page_retire_slow(so_ctx_v3* ctx, uint32_t ci, so_page_v3* page); static inline void* so_alloc_fast(so_ctx_v3* ctx, uint32_t ci) { so_class_v3* hc = &ctx->cls[ci]; const bool skip_header_c7 = (ci == 7) && tiny_header_v3_enabled() && tiny_header_v3_skip_c7(); so_page_v3* p = hc->current; if (p && p->freelist && p->used < p->capacity) { void* blk = p->freelist; p->freelist = *(void**)blk; p->used++; so_v3_record_alloc_current_hit((uint8_t)ci); if (skip_header_c7) { uint8_t* header_ptr = (uint8_t*)blk; *header_ptr = (uint8_t)(HEADER_MAGIC | (ci & HEADER_CLASS_MASK)); return header_ptr + 1; // mirror tiny_region_id_write_header fast path } return tiny_region_id_write_header(blk, (int)ci); } if (hc->partial) { so_page_v3* old_cur = hc->current; p = hc->partial; hc->partial = p->next; if (hc->partial_count > 0) { hc->partial_count--; } p->next = NULL; hc->current = p; if (old_cur && old_cur != p) { if (hc->partial_count < hc->max_partial_pages) { so_page_push_partial(hc, old_cur); } else { so_page_retire_slow(ctx, ci, old_cur); } } if (p->freelist && p->used < p->capacity) { void* blk = p->freelist; p->freelist = *(void**)blk; p->used++; so_v3_record_alloc_partial_hit((uint8_t)ci); if (skip_header_c7) { uint8_t* header_ptr = (uint8_t*)blk; *header_ptr = (uint8_t)(HEADER_MAGIC | (ci & HEADER_CLASS_MASK)); return header_ptr + 1; } return tiny_region_id_write_header(blk, (int)ci); } } return NULL; } static inline int so_unlink_partial(so_class_v3* hc, so_page_v3* target) { if (!hc || !target) return 0; so_page_v3* prev = NULL; so_page_v3* cur = hc->partial; while (cur) { if (cur == target) { if (prev) { prev->next = cur->next; } else { hc->partial = cur->next; } if (hc->partial_count > 0) { hc->partial_count--; } return 1; } prev = cur; cur = cur->next; } return 0; } static inline void so_page_retire_slow(so_ctx_v3* ctx, uint32_t ci, so_page_v3* page) { SmallObjectColdIface cold = smallobject_cold_iface_v1(); void* cold_ctx = (void*)tiny_heap_ctx_for_thread(); if (cold.retire_page) { cold.retire_page(cold_ctx, ci, page); } else { free(page); } (void)ctx; } static inline void so_free_fast(so_ctx_v3* ctx, uint32_t ci, void* ptr) { so_class_v3* hc = &ctx->cls[ci]; so_page_v3* page = so_page_of(hc, ptr); if (!page) { so_v3_record_page_of_fail((uint8_t)ci); so_v3_record_free_fallback((uint8_t)ci); tiny_heap_free_class_fast(tiny_heap_ctx_for_thread(), (int)ci, ptr); return; } *(void**)ptr = page->freelist; page->freelist = ptr; if (page->used > 0) { page->used--; } if (page->used == 0) { (void)so_unlink_partial(hc, page); if (hc->partial_count < hc->max_partial_pages) { so_page_push_partial(hc, page); so_v3_record_free_partial((uint8_t)ci); if (!hc->current) { hc->current = page; so_v3_record_free_current((uint8_t)ci); } } else { if (hc->current == page) { hc->current = NULL; } so_v3_record_free_retire((uint8_t)ci); so_page_retire_slow(ctx, ci, page); } } else if (!hc->current) { hc->current = page; so_v3_record_free_current((uint8_t)ci); } } static inline so_page_v3* so_alloc_refill_slow(so_ctx_v3* ctx, uint32_t ci) { SmallObjectColdIface cold = smallobject_cold_iface_v1(); void* cold_ctx = (void*)tiny_heap_ctx_for_thread(); if (!cold.refill_page) return NULL; so_page_v3* page = cold.refill_page(cold_ctx, ci); if (!page) return NULL; if (!page->base || page->capacity == 0) { if (cold.retire_page) { cold.retire_page(cold_ctx, ci, page); } else { free(page); } return NULL; } if (page->block_size == 0) { page->block_size = (uint32_t)tiny_stride_for_class((int)ci); } page->class_idx = ci; page->used = 0; page->freelist = so_build_freelist(page); if (!page->freelist) { if (cold.retire_page) { cold.retire_page(cold_ctx, ci, page); } else { free(page); } return NULL; } page->next = NULL; so_class_v3* hc = &ctx->cls[ci]; if (hc->current) { if (hc->partial_count < hc->max_partial_pages) { so_page_push_partial(hc, hc->current); } else { so_page_retire_slow(ctx, ci, hc->current); } } hc->current = page; return page; } void* so_alloc(uint32_t class_idx) { if (__builtin_expect(class_idx >= SMALLOBJECT_NUM_CLASSES, 0)) { return NULL; } so_v3_record_route_hit((uint8_t)class_idx); so_v3_record_alloc_call((uint8_t)class_idx); so_ctx_v3* ctx = so_tls_get(); void* blk = so_alloc_fast(ctx, class_idx); if (blk) return blk; so_page_v3* page = so_alloc_refill_slow(ctx, class_idx); if (!page) { so_v3_record_alloc_fallback((uint8_t)class_idx); return NULL; } so_v3_record_alloc_refill((uint8_t)class_idx); blk = so_alloc_fast(ctx, class_idx); if (!blk) { so_v3_record_alloc_fallback((uint8_t)class_idx); } return blk; } void so_free(uint32_t class_idx, void* ptr) { if (__builtin_expect(class_idx >= SMALLOBJECT_NUM_CLASSES, 0)) { return; } so_v3_record_free_call((uint8_t)class_idx); so_ctx_v3* ctx = so_tls_get(); so_free_fast(ctx, class_idx, ptr); } int smallobject_hotbox_v3_can_own_c7(void* ptr) { if (!ptr) return 0; if (!small_heap_v3_c7_enabled()) return 0; so_ctx_v3* ctx = g_so_ctx_v3; if (!ctx) return 0; // TLS 未初期化なら ownership なし so_class_v3* hc = &ctx->cls[7]; so_page_v3* page = so_page_of(hc, ptr); if (!page) return 0; if (page->class_idx != 7) return 0; return 1; } __attribute__((destructor)) static void so_v3_stats_dump(void) { if (!so_v3_stats_enabled()) return; for (int i = 0; i < SMALLOBJECT_NUM_CLASSES; i++) { so_stats_class_v3* st = &g_so_stats[i]; uint64_t rh = atomic_load_explicit(&st->route_hits, memory_order_relaxed); uint64_t ac = atomic_load_explicit(&st->alloc_calls, memory_order_relaxed); uint64_t ach = atomic_load_explicit(&st->alloc_current_hit, memory_order_relaxed); uint64_t aph = atomic_load_explicit(&st->alloc_partial_hit, memory_order_relaxed); uint64_t ar = atomic_load_explicit(&st->alloc_refill, memory_order_relaxed); uint64_t afb = atomic_load_explicit(&st->alloc_fallback_v1, memory_order_relaxed); uint64_t fc = atomic_load_explicit(&st->free_calls, memory_order_relaxed); uint64_t fcur = atomic_load_explicit(&st->free_current, memory_order_relaxed); uint64_t fpar = atomic_load_explicit(&st->free_partial, memory_order_relaxed); uint64_t fret = atomic_load_explicit(&st->free_retire, memory_order_relaxed); uint64_t ffb = atomic_load_explicit(&st->free_fallback_v1, memory_order_relaxed); uint64_t pof = atomic_load_explicit(&st->page_of_fail, memory_order_relaxed); if (rh + ac + afb + fc + ffb + ar + pof + ach + aph + fcur + fpar + fret == 0) continue; // Main stats (basic) fprintf(stderr, "[SMALL_HEAP_V3_STATS] cls=%d route_hits=%llu alloc_calls=%llu alloc_refill=%llu alloc_fb_v1=%llu free_calls=%llu free_fb_v1=%llu page_of_fail=%llu\n", i, (unsigned long long)rh, (unsigned long long)ac, (unsigned long long)ar, (unsigned long long)afb, (unsigned long long)fc, (unsigned long long)ffb, (unsigned long long)pof); // Detailed alloc path breakdown if (ach + aph > 0) { fprintf(stderr, " [ALLOC_DETAIL] alloc_current_hit=%llu alloc_partial_hit=%llu\n", (unsigned long long)ach, (unsigned long long)aph); } // Detailed free path breakdown if (fcur + fpar + fret > 0) { fprintf(stderr, " [FREE_DETAIL] free_current=%llu free_partial=%llu free_retire=%llu\n", (unsigned long long)fcur, (unsigned long long)fpar, (unsigned long long)fret); } } }